From 2c4e0d2fb8e81b9ce565acea0fdd03aa5747aa70 Mon Sep 17 00:00:00 2001 From: Hadley King Date: Mon, 20 May 2024 10:07:41 -0400 Subject: [PATCH] Model refactor (#280) * Change `bcodb` directory to `config` directory Changes to be committed: modified: .gitignore deleted: bcodb/settings.py renamed: bcodb/__init__.py -> config/__init__.py renamed: bcodb/asgi.py -> config/asgi.py renamed: bcodb/fixtures/local_data.json -> config/fixtures/local_data.json renamed: bcodb/fixtures/test_portal.json -> config/fixtures/test_portal.json renamed: bcodb/urls.py -> config/urls.py renamed: bcodb/wsgi.py -> config/wsgi.py modified: manage.py * Create NewUser model and remove new_user model changed model behind `/api/accounts/new/` Changes to be committed: modified: api/admin.py new file: api/migrations/0006_delete_new_users.py modified: api/models.py deleted: api/scripts/method_specific/POST_api_accounts_new.py modified: api/urls.py modified: api/views.py modified: authentication/admin.py modified: authentication/apis.py new file: authentication/migrations/0002_newuser.py modified: authentication/models.py modified: authentication/selectors.py modified: authentication/services.py modified: tests/fixtures/test_data.json modified: tests/test_views/test_api_account_activate.py modified: tests/test_views/test_api_account_new.py * Add example values to test account request Changes to be committed: modified: authentication/apis.py new file: docs/refactor.md * Account activation has been enabled. On branch account_authorization Changes to be committed: modified: api/urls.py modified: api/views.py modified: authentication/apis.py modified: authentication/selectors.py modified: authentication/services.py modified: authentication/urls.py * Account authorization (#277) * Account activation has been enabled. On branch account_authorization Changes to be committed: modified: api/urls.py modified: api/views.py modified: authentication/apis.py modified: authentication/selectors.py modified: authentication/services.py modified: authentication/urls.py * get_new_user * Remove API app Changes to be committed: deleted: api/__init__.py deleted: api/admin.py deleted: api/apps.py deleted: api/fixtures/bootstrap.json deleted: api/fixtures/metafixtures deleted: api/fixtures/metafixtures.json deleted: api/keys.sh deleted: api/migrations/0001_initial.py deleted: api/migrations/0002_auto_20220124_2356.py deleted: api/migrations/0003_rename_meta_table_prefix_table.py deleted: api/migrations/0004_rename_group_info_groupinfo.py deleted: api/migrations/0005_rename_prefixes_prefix.py deleted: api/migrations/0006_delete_new_users.py deleted: api/migrations/__init__.py deleted: api/model/__init__.py deleted: api/model/groups.py deleted: api/model/prefix.py deleted: api/models.py deleted: api/permissions.py deleted: api/rdb.sh deleted: api/request_definitions/GET.schema deleted: api/request_definitions/POST.schema deleted: api/request_definitions/templates/DELETE_delete_object_by_id.schema deleted: api/request_definitions/templates/GET_activate_account.schema deleted: api/request_definitions/templates/GET_get_object_by_id.schema deleted: api/request_definitions/templates/GET_retrieve_available_schema.schema deleted: api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema deleted: api/request_definitions/templates/POST_convert_payload_to_schema.schema deleted: api/request_definitions/templates/POST_new_account.schema deleted: api/request_definitions/templates/POST_object_listing_by_token.schema deleted: api/request_definitions/templates/POST_objects_draft.schema deleted: api/request_definitions/templates/POST_objects_publish.schema deleted: api/request_definitions/templates/POST_read_object.schema deleted: api/request_definitions/templates/POST_validate_payload_against_schema.schema deleted: api/scripts/__init__.py deleted: api/scripts/method_specific/GET_draft_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id_with_version.py deleted: api/scripts/method_specific/GET_retrieve_available_schema.py deleted: api/scripts/method_specific/POST_api_objects_drafts_create.py deleted: api/scripts/method_specific/POST_api_objects_drafts_delete.py deleted: api/scripts/method_specific/POST_api_objects_drafts_modify.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py deleted: api/scripts/method_specific/POST_api_objects_drafts_publish.py deleted: api/scripts/method_specific/POST_api_objects_drafts_read.py deleted: api/scripts/method_specific/POST_api_objects_drafts_token.py deleted: api/scripts/method_specific/POST_api_objects_publish.py deleted: api/scripts/method_specific/POST_api_objects_published.py deleted: api/scripts/method_specific/POST_api_objects_search.py deleted: api/scripts/method_specific/POST_api_objects_token.py deleted: api/scripts/method_specific/POST_validate_payload_against_schema.py deleted: api/scripts/method_specific/__init__.py deleted: api/scripts/utilities/DbUtils.py deleted: api/scripts/utilities/FileUtils.py deleted: api/scripts/utilities/JsonUtils.py deleted: api/scripts/utilities/RequestUtils.py deleted: api/scripts/utilities/ResponseUtils.py deleted: api/scripts/utilities/SettingsUtils.py deleted: api/scripts/utilities/UserUtils.py deleted: api/scripts/utilities/__init__.py deleted: api/serializers.py deleted: api/signals.py deleted: api/templates/api/account_activation_message.html deleted: api/urls.py deleted: api/validation_definitions/IEEE/2791object.json deleted: api/validation_definitions/IEEE/description_domain.json deleted: api/validation_definitions/IEEE/error_domain.json deleted: api/validation_definitions/IEEE/execution_domain.json deleted: api/validation_definitions/IEEE/io_domain.json deleted: api/validation_definitions/IEEE/parametric_domain.json deleted: api/validation_definitions/IEEE/provenance_domain.json deleted: api/validation_definitions/IEEE/usability_domain.json deleted: api/validation_definitions/IEEE_sub/IEEE2791-2020.schema deleted: api/validation_definitions/IEEE_sub/domains/description_domain.json deleted: api/validation_definitions/IEEE_sub/domains/error_domain.json deleted: api/validation_definitions/IEEE_sub/domains/execution_domain.json deleted: api/validation_definitions/IEEE_sub/domains/io_domain.json deleted: api/validation_definitions/IEEE_sub/domains/parametric_domain.json deleted: api/validation_definitions/IEEE_sub/domains/provenance_domain.json deleted: api/validation_definitions/IEEE_sub/domains/usability_domain.json deleted: api/validation_definitions/uri_external deleted: api/views.py new file: config/settings.py modified: config/urls.py Changes not staged for commit: modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py modified: docs/refactor.md modified: search/selectors.py modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_accounts_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Framework for BioCompute model Changes to be committed: new file: biocompute/__init__.py new file: biocompute/admin.py new file: biocompute/apis.py new file: biocompute/migrations/__init__.py new file: biocompute/models.py new file: biocompute/selectors.py new file: biocompute/services.py new file: biocompute/urls.py * Added files for Prefix model Changes to be committed: new file: prefix/__init__.py new file: prefix/admin.py new file: prefix/apis.py new file: prefix/apps.py new file: prefix/migrations/__init__.py new file: prefix/models.py new file: prefix/selectors.py new file: prefix/services.py new file: prefix/urls.py * Tests for authentication Changes to be committed: modified: .gitignore modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py new file: biocompute/migrations/0001_initial.py modified: docs/refactor.md new file: prefix/migrations/0001_initial.py modified: search/selectors.py new file: test.json new file: tests/fixtures/old_test_data.json modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py renamed: tests/test_views/test_api_accounts_describe.py -> tests/test_views/test_api_account_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Swagger move (#283) * Remove API app Changes to be committed: deleted: api/__init__.py deleted: api/admin.py deleted: api/apps.py deleted: api/fixtures/bootstrap.json deleted: api/fixtures/metafixtures deleted: api/fixtures/metafixtures.json deleted: api/keys.sh deleted: api/migrations/0001_initial.py deleted: api/migrations/0002_auto_20220124_2356.py deleted: api/migrations/0003_rename_meta_table_prefix_table.py deleted: api/migrations/0004_rename_group_info_groupinfo.py deleted: api/migrations/0005_rename_prefixes_prefix.py deleted: api/migrations/0006_delete_new_users.py deleted: api/migrations/__init__.py deleted: api/model/__init__.py deleted: api/model/groups.py deleted: api/model/prefix.py deleted: api/models.py deleted: api/permissions.py deleted: api/rdb.sh deleted: api/request_definitions/GET.schema deleted: api/request_definitions/POST.schema deleted: api/request_definitions/templates/DELETE_delete_object_by_id.schema deleted: api/request_definitions/templates/GET_activate_account.schema deleted: api/request_definitions/templates/GET_get_object_by_id.schema deleted: api/request_definitions/templates/GET_retrieve_available_schema.schema deleted: api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema deleted: api/request_definitions/templates/POST_convert_payload_to_schema.schema deleted: api/request_definitions/templates/POST_new_account.schema deleted: api/request_definitions/templates/POST_object_listing_by_token.schema deleted: api/request_definitions/templates/POST_objects_draft.schema deleted: api/request_definitions/templates/POST_objects_publish.schema deleted: api/request_definitions/templates/POST_read_object.schema deleted: api/request_definitions/templates/POST_validate_payload_against_schema.schema deleted: api/scripts/__init__.py deleted: api/scripts/method_specific/GET_draft_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id_with_version.py deleted: api/scripts/method_specific/GET_retrieve_available_schema.py deleted: api/scripts/method_specific/POST_api_objects_drafts_create.py deleted: api/scripts/method_specific/POST_api_objects_drafts_delete.py deleted: api/scripts/method_specific/POST_api_objects_drafts_modify.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py deleted: api/scripts/method_specific/POST_api_objects_drafts_publish.py deleted: api/scripts/method_specific/POST_api_objects_drafts_read.py deleted: api/scripts/method_specific/POST_api_objects_drafts_token.py deleted: api/scripts/method_specific/POST_api_objects_publish.py deleted: api/scripts/method_specific/POST_api_objects_published.py deleted: api/scripts/method_specific/POST_api_objects_search.py deleted: api/scripts/method_specific/POST_api_objects_token.py deleted: api/scripts/method_specific/POST_validate_payload_against_schema.py deleted: api/scripts/method_specific/__init__.py deleted: api/scripts/utilities/DbUtils.py deleted: api/scripts/utilities/FileUtils.py deleted: api/scripts/utilities/JsonUtils.py deleted: api/scripts/utilities/RequestUtils.py deleted: api/scripts/utilities/ResponseUtils.py deleted: api/scripts/utilities/SettingsUtils.py deleted: api/scripts/utilities/UserUtils.py deleted: api/scripts/utilities/__init__.py deleted: api/serializers.py deleted: api/signals.py deleted: api/templates/api/account_activation_message.html deleted: api/urls.py deleted: api/validation_definitions/IEEE/2791object.json deleted: api/validation_definitions/IEEE/description_domain.json deleted: api/validation_definitions/IEEE/error_domain.json deleted: api/validation_definitions/IEEE/execution_domain.json deleted: api/validation_definitions/IEEE/io_domain.json deleted: api/validation_definitions/IEEE/parametric_domain.json deleted: api/validation_definitions/IEEE/provenance_domain.json deleted: api/validation_definitions/IEEE/usability_domain.json deleted: api/validation_definitions/IEEE_sub/IEEE2791-2020.schema deleted: api/validation_definitions/IEEE_sub/domains/description_domain.json deleted: api/validation_definitions/IEEE_sub/domains/error_domain.json deleted: api/validation_definitions/IEEE_sub/domains/execution_domain.json deleted: api/validation_definitions/IEEE_sub/domains/io_domain.json deleted: api/validation_definitions/IEEE_sub/domains/parametric_domain.json deleted: api/validation_definitions/IEEE_sub/domains/provenance_domain.json deleted: api/validation_definitions/IEEE_sub/domains/usability_domain.json deleted: api/validation_definitions/uri_external deleted: api/views.py new file: config/settings.py modified: config/urls.py Changes not staged for commit: modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py modified: docs/refactor.md modified: search/selectors.py modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_accounts_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Framework for BioCompute model Changes to be committed: new file: biocompute/__init__.py new file: biocompute/admin.py new file: biocompute/apis.py new file: biocompute/migrations/__init__.py new file: biocompute/models.py new file: biocompute/selectors.py new file: biocompute/services.py new file: biocompute/urls.py * Added files for Prefix model Changes to be committed: new file: prefix/__init__.py new file: prefix/admin.py new file: prefix/apis.py new file: prefix/apps.py new file: prefix/migrations/__init__.py new file: prefix/models.py new file: prefix/selectors.py new file: prefix/services.py new file: prefix/urls.py * Tests for authentication Changes to be committed: modified: .gitignore modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py new file: biocompute/migrations/0001_initial.py modified: docs/refactor.md new file: prefix/migrations/0001_initial.py modified: search/selectors.py new file: test.json new file: tests/fixtures/old_test_data.json modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py renamed: tests/test_views/test_api_accounts_describe.py -> tests/test_views/test_api_account_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Move Swagger files and implement BCO draft create Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/services.py modified: biocompute/urls.py new file: config/services.py modified: config/urls.py modified: docs/refactor.md modified: prefix/urls.py new file: tests/fixtures/example_bco.py new file: tests/test_views/test_api_objects_drafts_create.py * Doc fix in Bco.models Changes to be committed: modified: biocompute/models.py * Biocompute model (#284) * Remove API app Changes to be committed: deleted: api/__init__.py deleted: api/admin.py deleted: api/apps.py deleted: api/fixtures/bootstrap.json deleted: api/fixtures/metafixtures deleted: api/fixtures/metafixtures.json deleted: api/keys.sh deleted: api/migrations/0001_initial.py deleted: api/migrations/0002_auto_20220124_2356.py deleted: api/migrations/0003_rename_meta_table_prefix_table.py deleted: api/migrations/0004_rename_group_info_groupinfo.py deleted: api/migrations/0005_rename_prefixes_prefix.py deleted: api/migrations/0006_delete_new_users.py deleted: api/migrations/__init__.py deleted: api/model/__init__.py deleted: api/model/groups.py deleted: api/model/prefix.py deleted: api/models.py deleted: api/permissions.py deleted: api/rdb.sh deleted: api/request_definitions/GET.schema deleted: api/request_definitions/POST.schema deleted: api/request_definitions/templates/DELETE_delete_object_by_id.schema deleted: api/request_definitions/templates/GET_activate_account.schema deleted: api/request_definitions/templates/GET_get_object_by_id.schema deleted: api/request_definitions/templates/GET_retrieve_available_schema.schema deleted: api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema deleted: api/request_definitions/templates/POST_convert_payload_to_schema.schema deleted: api/request_definitions/templates/POST_new_account.schema deleted: api/request_definitions/templates/POST_object_listing_by_token.schema deleted: api/request_definitions/templates/POST_objects_draft.schema deleted: api/request_definitions/templates/POST_objects_publish.schema deleted: api/request_definitions/templates/POST_read_object.schema deleted: api/request_definitions/templates/POST_validate_payload_against_schema.schema deleted: api/scripts/__init__.py deleted: api/scripts/method_specific/GET_draft_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id.py deleted: api/scripts/method_specific/GET_published_object_by_id_with_version.py deleted: api/scripts/method_specific/GET_retrieve_available_schema.py deleted: api/scripts/method_specific/POST_api_objects_drafts_create.py deleted: api/scripts/method_specific/POST_api_objects_drafts_delete.py deleted: api/scripts/method_specific/POST_api_objects_drafts_modify.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions.py deleted: api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py deleted: api/scripts/method_specific/POST_api_objects_drafts_publish.py deleted: api/scripts/method_specific/POST_api_objects_drafts_read.py deleted: api/scripts/method_specific/POST_api_objects_drafts_token.py deleted: api/scripts/method_specific/POST_api_objects_publish.py deleted: api/scripts/method_specific/POST_api_objects_published.py deleted: api/scripts/method_specific/POST_api_objects_search.py deleted: api/scripts/method_specific/POST_api_objects_token.py deleted: api/scripts/method_specific/POST_validate_payload_against_schema.py deleted: api/scripts/method_specific/__init__.py deleted: api/scripts/utilities/DbUtils.py deleted: api/scripts/utilities/FileUtils.py deleted: api/scripts/utilities/JsonUtils.py deleted: api/scripts/utilities/RequestUtils.py deleted: api/scripts/utilities/ResponseUtils.py deleted: api/scripts/utilities/SettingsUtils.py deleted: api/scripts/utilities/UserUtils.py deleted: api/scripts/utilities/__init__.py deleted: api/serializers.py deleted: api/signals.py deleted: api/templates/api/account_activation_message.html deleted: api/urls.py deleted: api/validation_definitions/IEEE/2791object.json deleted: api/validation_definitions/IEEE/description_domain.json deleted: api/validation_definitions/IEEE/error_domain.json deleted: api/validation_definitions/IEEE/execution_domain.json deleted: api/validation_definitions/IEEE/io_domain.json deleted: api/validation_definitions/IEEE/parametric_domain.json deleted: api/validation_definitions/IEEE/provenance_domain.json deleted: api/validation_definitions/IEEE/usability_domain.json deleted: api/validation_definitions/IEEE_sub/IEEE2791-2020.schema deleted: api/validation_definitions/IEEE_sub/domains/description_domain.json deleted: api/validation_definitions/IEEE_sub/domains/error_domain.json deleted: api/validation_definitions/IEEE_sub/domains/execution_domain.json deleted: api/validation_definitions/IEEE_sub/domains/io_domain.json deleted: api/validation_definitions/IEEE_sub/domains/parametric_domain.json deleted: api/validation_definitions/IEEE_sub/domains/provenance_domain.json deleted: api/validation_definitions/IEEE_sub/domains/usability_domain.json deleted: api/validation_definitions/uri_external deleted: api/views.py new file: config/settings.py modified: config/urls.py Changes not staged for commit: modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py modified: docs/refactor.md modified: search/selectors.py modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_accounts_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Framework for BioCompute model Changes to be committed: new file: biocompute/__init__.py new file: biocompute/admin.py new file: biocompute/apis.py new file: biocompute/migrations/__init__.py new file: biocompute/models.py new file: biocompute/selectors.py new file: biocompute/services.py new file: biocompute/urls.py * Added files for Prefix model Changes to be committed: new file: prefix/__init__.py new file: prefix/admin.py new file: prefix/apis.py new file: prefix/apps.py new file: prefix/migrations/__init__.py new file: prefix/models.py new file: prefix/selectors.py new file: prefix/services.py new file: prefix/urls.py * Tests for authentication Changes to be committed: modified: .gitignore modified: authentication/apis.py modified: authentication/migrations/0001_initial.py deleted: authentication/migrations/0002_newuser.py modified: authentication/services.py new file: biocompute/migrations/0001_initial.py modified: docs/refactor.md new file: prefix/migrations/0001_initial.py modified: search/selectors.py new file: test.json new file: tests/fixtures/old_test_data.json modified: tests/fixtures/test_data.json deleted: tests/test_database.py deleted: tests/test_fixtures.py deleted: tests/test_models modified: tests/test_views/test_api_account_activate.py renamed: tests/test_views/test_api_accounts_describe.py -> tests/test_views/test_api_account_describe.py modified: tests/test_views/test_api_auth_add.py modified: tests/test_views/test_api_auth_reset_token.py deleted: tests/test_views/test_api_groups_group_info.py deleted: tests/test_views/test_api_groups_modify.py deleted: tests/test_views/test_api_objects.py deleted: tests/test_views/test_api_objects_drafts_create.py deleted: tests/test_views/test_api_objects_drafts_modify.py deleted: tests/test_views/test_api_objects_drafts_publish.py deleted: tests/test_views/test_api_objects_search.py deleted: tests/test_views/test_api_objects_validate.py deleted: tests/test_views/test_api_prefixes_create.py deleted: tests/test_views/test_api_prefixes_token.py deleted: tests/test_views/test_get_object_id_draft.py deleted: tests/test_views/test_get_objectid.py deleted: tests/test_views/test_published_object_by_id.py modified: token.json * Move Swagger files and implement BCO draft create Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/services.py modified: biocompute/urls.py new file: config/services.py modified: config/urls.py modified: docs/refactor.md modified: prefix/urls.py new file: tests/fixtures/example_bco.py new file: tests/test_views/test_api_objects_drafts_create.py * Doc fix in Bco.models Changes to be committed: modified: biocompute/models.py * Updated biocompute.model and prefix.model Added the DraftsCreateApi and tests. This included serializers for BCO creation Changes to be committed: modified: biocompute/apis.py modified: biocompute/services.py modified: config/services.py modified: prefix/models.py modified: prefix/services.py modified: test.json modified: tests/fixtures/test_data.json new file: tests/test_views/test_api_objects_drafts_create.py * Implemented PrefixesCreateApi Changes to be committed: modified: biocompute/apis.py modified: config/fixtures/local_data.json modified: config/services.py modified: config/urls.py modified: prefix/apis.py modified: prefix/services.py modified: prefix/urls.py modified: search/apis.py modified: tests/test_views/test_api_auth_remove.py modified: tests/test_views/test_api_objects_drafts_create.py new file: tests/test_views/test_api_prefixes_create.py * Added Prefix management URLs and APIs Changes to be committed: modified: config/fixtures/local_data.json modified: docs/refactor.md modified: prefix/apis.py modified: prefix/services.py modified: prefix/urls.py modified: tests/fixtures/test_data.json deleted: tests/test_views/test_api_prefixes_create.py new file: tests/test_views/test_prefixes_create.py new file: tests/test_views/test_prefixes_modify.py * Cleaning up some things Changes to be committed: modified: biocompute/apis.py modified: biocompute/migrations/0001_initial.py modified: prefix/apis.py modified: prefix/migrations/0001_initial.py new file: tests/test_views/test_account_activate.py new file: tests/test_views/test_account_describe.py new file: tests/test_views/test_account_new.py new file: tests/test_views/test_auth_add.py new file: tests/test_views/test_auth_remove.py new file: tests/test_views/test_auth_reset_token.py new file: tests/test_views/test_objects_drafts_create.py * Add prefix creation function Changes to be committed: modified: authentication/services.py modified: biocompute/models.py modified: config/settings.py modified: docs/refactor.md modified: prefix/apis.py modified: prefix/models.py modified: prefix/services.py modified: tests/test_views/test_api_objects_drafts_create.py * Add Prefix Modify API endpoint On branch prefix_perms Your branch is up to date with 'origin/prefix_perms'. Changes to be committed: modified: config/services.py modified: prefix/apis.py modified: prefix/models.py modified: prefix/selectors.py modified: prefix/services.py * response_c * Add `DraftRetrieveApi` To do this functions related to Prefixes and the associated permissions were created Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: config/asgi.py modified: config/fixtures/local_data.json modified: config/services.py modified: config/urls.py modified: prefix/selectors.py modified: prefix/services.py deleted: test.json modified: tests/fixtures/example_bco.py modified: tests/fixtures/test_data.json deleted: token.json * Update testing functions Created subdirectory and moved test functions Changes to be committed: renamed: tests/test_views/__init__.py -> tests/test_apis/__init__.py new file: tests/test_apis/test_api_authentication/__init__.py renamed: tests/test_views/test_account_activate.py -> tests/test_apis/test_api_authentication/test_account_activate.py renamed: tests/test_views/test_account_describe.py -> tests/test_apis/test_api_authentication/test_account_describe.py renamed: tests/test_views/test_account_new.py -> tests/test_apis/test_api_authentication/test_account_new.py renamed: tests/test_views/test_api_auth_add.py -> tests/test_apis/test_api_authentication/test_api_auth_add.py renamed: tests/test_views/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/test_api_auth_remove.py renamed: tests/test_views/test_api_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_views/test_api_objects_drafts_create.py -> tests/test_apis/test_api_objects_drafts_create.py renamed: tests/test_views/test_auth_add.py -> tests/test_apis/test_auth_add.py renamed: tests/test_views/test_auth_remove.py -> tests/test_apis/test_auth_remove.py renamed: tests/test_views/test_auth_reset_token.py -> tests/test_apis/test_auth_reset_token.py renamed: tests/test_views/test_objects_drafts_create.py -> tests/test_apis/test_objects_drafts_create.py renamed: tests/test_views/test_prefixes_create.py -> tests/test_apis/test_prefixes_create.py new file: tests/test_apis/test_prefixes_modify.py deleted: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_account_describe.py deleted: tests/test_views/test_api_account_new.py deleted: tests/test_views/test_prefixes_modify.py * Add `DraftRetrieveApi` To do this functions related to Prefixes and the associated permissions were created Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: config/asgi.py modified: config/fixtures/local_data.json modified: config/services.py modified: config/urls.py modified: prefix/selectors.py modified: prefix/services.py deleted: test.json modified: tests/fixtures/example_bco.py modified: tests/fixtures/test_data.json deleted: token.json * Update testing functions Created subdirectory and moved test functions Changes to be committed: renamed: tests/test_views/__init__.py -> tests/test_apis/__init__.py new file: tests/test_apis/test_api_authentication/__init__.py renamed: tests/test_views/test_account_activate.py -> tests/test_apis/test_api_authentication/test_account_activate.py renamed: tests/test_views/test_account_describe.py -> tests/test_apis/test_api_authentication/test_account_describe.py renamed: tests/test_views/test_account_new.py -> tests/test_apis/test_api_authentication/test_account_new.py renamed: tests/test_views/test_api_auth_add.py -> tests/test_apis/test_api_authentication/test_api_auth_add.py renamed: tests/test_views/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/test_api_auth_remove.py renamed: tests/test_views/test_api_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_views/test_api_objects_drafts_create.py -> tests/test_apis/test_api_objects_drafts_create.py renamed: tests/test_views/test_auth_add.py -> tests/test_apis/test_auth_add.py renamed: tests/test_views/test_auth_remove.py -> tests/test_apis/test_auth_remove.py renamed: tests/test_views/test_auth_reset_token.py -> tests/test_apis/test_auth_reset_token.py renamed: tests/test_views/test_objects_drafts_create.py -> tests/test_apis/test_objects_drafts_create.py renamed: tests/test_views/test_prefixes_create.py -> tests/test_apis/test_prefixes_create.py new file: tests/test_apis/test_prefixes_modify.py deleted: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_account_describe.py deleted: tests/test_views/test_api_account_new.py deleted: tests/test_views/test_prefixes_modify.py * Update to Test functions Changes to be committed: modified: biocompute/migrations/0001_initial.py modified: docs/refactor.md modified: prefix/apis.py modified: prefix/migrations/0001_initial.py modified: prefix/services.py new file: tests/fixtures/bco_dump.json modified: tests/fixtures/test_data.json deleted: tests/test_apis/test_api_authentication/test_api_auth_add.py deleted: tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_apis/test_auth_add.py -> tests/test_apis/test_api_authentication/test_auth_add.py renamed: tests/test_apis/test_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_auth_reset_token.py renamed: tests/test_apis/test_api_authentication/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/testi_auth_remove.py deleted: tests/test_apis/test_api_objects_drafts_create.py new file: tests/test_apis/test_api_prefix/__init__.py renamed: tests/test_apis/test_prefixes_create.py -> tests/test_apis/test_api_prefix/test_prefixes_create.py renamed: tests/test_apis/test_prefixes_modify.py -> tests/test_apis/test_api_prefix/test_prefixes_modify.py deleted: tests/test_apis/test_auth_remove.py new file: tests/test_apis/test_biocompute/__init__.py renamed: tests/test_apis/test_objects_drafts_create.py -> tests/test_apis/test_biocompute/test_objects_drafts_create.py * Bco perms (#293) * Add `DraftRetrieveApi` To do this functions related to Prefixes and the associated permissions were created Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: config/asgi.py modified: config/fixtures/local_data.json modified: config/services.py modified: config/urls.py modified: prefix/selectors.py modified: prefix/services.py deleted: test.json modified: tests/fixtures/example_bco.py modified: tests/fixtures/test_data.json deleted: token.json * Update testing functions Created subdirectory and moved test functions Changes to be committed: renamed: tests/test_views/__init__.py -> tests/test_apis/__init__.py new file: tests/test_apis/test_api_authentication/__init__.py renamed: tests/test_views/test_account_activate.py -> tests/test_apis/test_api_authentication/test_account_activate.py renamed: tests/test_views/test_account_describe.py -> tests/test_apis/test_api_authentication/test_account_describe.py renamed: tests/test_views/test_account_new.py -> tests/test_apis/test_api_authentication/test_account_new.py renamed: tests/test_views/test_api_auth_add.py -> tests/test_apis/test_api_authentication/test_api_auth_add.py renamed: tests/test_views/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/test_api_auth_remove.py renamed: tests/test_views/test_api_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_views/test_api_objects_drafts_create.py -> tests/test_apis/test_api_objects_drafts_create.py renamed: tests/test_views/test_auth_add.py -> tests/test_apis/test_auth_add.py renamed: tests/test_views/test_auth_remove.py -> tests/test_apis/test_auth_remove.py renamed: tests/test_views/test_auth_reset_token.py -> tests/test_apis/test_auth_reset_token.py renamed: tests/test_views/test_objects_drafts_create.py -> tests/test_apis/test_objects_drafts_create.py renamed: tests/test_views/test_prefixes_create.py -> tests/test_apis/test_prefixes_create.py new file: tests/test_apis/test_prefixes_modify.py deleted: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_account_describe.py deleted: tests/test_views/test_api_account_new.py deleted: tests/test_views/test_prefixes_modify.py * Update to Test functions Changes to be committed: modified: biocompute/migrations/0001_initial.py modified: docs/refactor.md modified: prefix/apis.py modified: prefix/migrations/0001_initial.py modified: prefix/services.py new file: tests/fixtures/bco_dump.json modified: tests/fixtures/test_data.json deleted: tests/test_apis/test_api_authentication/test_api_auth_add.py deleted: tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_apis/test_auth_add.py -> tests/test_apis/test_api_authentication/test_auth_add.py renamed: tests/test_apis/test_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_auth_reset_token.py renamed: tests/test_apis/test_api_authentication/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/testi_auth_remove.py deleted: tests/test_apis/test_api_objects_drafts_create.py new file: tests/test_apis/test_api_prefix/__init__.py renamed: tests/test_apis/test_prefixes_create.py -> tests/test_apis/test_api_prefix/test_prefixes_create.py renamed: tests/test_apis/test_prefixes_modify.py -> tests/test_apis/test_api_prefix/test_prefixes_modify.py deleted: tests/test_apis/test_auth_remove.py new file: tests/test_apis/test_biocompute/__init__.py renamed: tests/test_apis/test_objects_drafts_create.py -> tests/test_apis/test_biocompute/test_objects_drafts_create.py * Add `access_count` incrementer to DraftRetrieveApi Changes to be committed: modified: biocompute/apis.py modified: biocompute/services.py modified: prefix/services.py * Added PublishedRetrieveApi class Changes to be committed: modified: biocompute/apis.py modified: biocompute/selectors.py modified: config/urls.py * Update test DB and testing functions Changes to be committed: modified: config/fixtures/local_data.json modified: tests/fixtures/test_data.json deleted: tests/test_apis/test_api_objects_drafts_create.py deleted: tests/test_apis/test_auth_add.py deleted: tests/test_apis/test_auth_remove.py deleted: tests/test_apis/test_auth_reset_token.py renamed: tests/test_apis/test_objects_drafts_create.py -> tests/test_apis/test_biocompute/objects_drafts_create.py deleted: tests/test_apis/test_prefixes_create.py deleted: tests/test_apis/test_prefixes_modify.py * Add DraftsModifyApi Changes to be committed: modified: biocompute/apis.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: prefix/selectors.py deleted: tests/test_apis/test_biocompute/objects_drafts_create.py deleted: tests/test_apis/test_biocompute/test_objects_drafts_create.py * test for objects_drafts_create Changes to be committed: new file: tests/test_apis/test_biocompute/test_objects_drafts_create.py * split("/") * Publish draft (#308) * Add DraftsModifyApi Changes to be committed: modified: biocompute/apis.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: prefix/selectors.py deleted: tests/test_apis/test_biocompute/objects_drafts_create.py deleted: tests/test_apis/test_biocompute/test_objects_drafts_create.py * Add IEEE schema Changes to be committed: new file: .secrets new file: config/IEEE/2791object.json new file: config/IEEE/description_domain.json new file: config/IEEE/error_domain.json new file: config/IEEE/execution_domain.json new file: config/IEEE/io_domain.json new file: config/IEEE/parametric_domain.json new file: config/IEEE/provenance_domain.json new file: config/IEEE/usability_domain.json * test_objects_drafts_create Changes to be committed: new file: tests/test_apis/test_biocompute/test_objects_drafts_create.py * Fix test Changes to be committed: modified: tests/test_apis/test_biocompute/test_objects_drafts_create.py * Enable publish bco endpoint This included a great many supporting functions and permission checks to accomplish. Also added the validate functions to the codebase Changes to be committed: modified: biocompute/apis.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: config/services.py modified: docs/refactor.md modified: prefix/selectors.py new file: tests/test_apis/test_biocompute/test_objects_drafts_publish.py * Add BCO Score to model. Also added functions and methods for BCO Score. Updates to the BCO admin panel. Updates to the `test_data.json` and `local_data.json` Fix # 310 Changes to be committed: modified: authentication/migrations/0001_initial.py modified: biocompute/admin.py modified: biocompute/apis.py modified: biocompute/migrations/0001_initial.py modified: biocompute/models.py modified: biocompute/services.py new file: config/bco_scores.py modified: config/fixtures/local_data.json modified: tests/fixtures/test_data.json * Add BCODB search API endpoint. Changes to be committed: modified: biocompute/apis.py modified: search/apis.py modified: search/selectors.py modified: search/urls.py * Add POST search Changes to be committed: modified: authentication/apis.py modified: search/apis.py * Updated config file to `.secrets` also updated accounts/describe Changes to be committed: modified: .gitignore new file: .secrets.example modified: authentication/selectors.py modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/selectors.py modified: config/settings.py deleted: server.conf deleted: sever.conf.example * Fix config/urls.py error Changes to be committed: deleted: .secrets modified: config/settings.py modified: config/urls.py * Refactor `config/services.py::response_constructor` Changes to be committed: modified: config/services.py modified: tests/test_apis/test_api_prefix/test_prefixes_create.py * Prefix registry (#318) * Created test_objects_drafts_modify, hasn't passed test yet * Created test_objects_drafts_modify, hasn't passed test yet * Created test_objects_drafts_modify, hasn't passed test yet * new tests * Update modify_prefix permissions Changes to be committed: modified: authentication/selectors.py modified: config/fixtures/local_data.json modified: prefix/services.py * Created and tested test_objects_drafts_modify API * Fixes for Prefix creation Changes to be committed: modified: config/services.py modified: prefix/apis.py modified: tests/test_apis/test_api_prefix/test_prefixes_create.py * Add UserSearch endpoint Changes to be committed: modified: prefix/apis.py modified: prefix/selectors.py modified: search/apis.py modified: search/urls.py * Fixes for deployment Changes to be committed: modified: .secrets.example modified: admin_only/bco_api.service modified: biocompute/apis.py modified: config/fixtures/test_portal.json modified: config/settings.py modified: tests/fixtures/test_data.json modified: tests/test_apis/test_biocompute/test_objects_drafts_create.py * test_objects_drafts_publish failed. Kicking back to Hadley for more investigation * Update bco_api.conf Changes to be committed: modified: admin_only/bco_api.conf --------- Co-authored-by: tianywan819 <57418894+tianywan819@users.noreply.github.com> Co-authored-by: acoleman29 * Fixes for testing files Changes to be committed: modified: biocompute/apis.py modified: biocompute/services.py modified: config/bco_scores.py modified: config/services.py modified: config/settings.py modified: config/urls.py modified: tests/fixtures/test_data.json renamed: tests/fixtures/example_bco.py -> tests/fixtures/testing_bcos.py modified: tests/test_apis/test_api_prefix/test_prefixes_create.py modified: tests/test_apis/test_biocompute/test_objects_drafts_modify.py modified: tests/test_apis/test_biocompute/test_objects_drafts_publish.py * "derrived_from": [] * fix test * Add Validate url endpoint Changes to be committed: modified: biocompute/apis.py modified: biocompute/urls.py modified: tests/test_apis/test_biocompute/test_objects_drafts_create.py modified: tests/test_apis/test_biocompute/test_objects_drafts_publish.py * Update create test to include HOSTNAME Changes to be committed: modified: config/services.py modified: tests/test_apis/test_biocompute/test_objects_drafts_create.py * Rollback status for validation Changes to be committed: modified: biocompute/apis.py * Changes to be committed: modified: biocompute/apis.py * Typo fix Changes to be committed: modified: biocompute/apis.py * Add `test_portal.json` for testing Changes to be committed: modified: config/fixtures/test_portal.json * Testing fixes Changes to be committed: modified: admin_only/bco_api.conf modified: config/fixtures/test_portal.json * Fix for draft publish Changes to be committed: modified: config/services.py * Add new_bco_instance.save() Changes to be committed: modified: biocompute/services.py modified: config/services.py * CustomJSONWebTokenAuthentication for GET Pub BCO Changes to be committed: modified: biocompute/apis.py * Updates to authentication.apis.py Changes to be committed: modified: authentication/apis.py modified: authentication/services.py modified: search/apis.py * Updates to biocompute.apis.py Changes to be committed: modified: biocompute/apis.py modified: biocompute/selectors.py modified: search/apis.py modified: tests/fixtures/testing_bcos.py modified: tests/test_apis/test_biocompute/test_objects_drafts_create.py * Remove print statements Changes to be committed: modified: authentication/apis.py modified: authentication/services.py modified: authentication/urls.py modified: biocompute/apis.py modified: biocompute/services.py modified: config/urls.py modified: prefix/services.py modified: tests/test_apis/test_biocompute/test_objects_drafts_publish.py * Updates to prefix.apis Changes to be committed: modified: prefix/apis.py * print( * change publish bco rejected response On branch docs Your branch is up to date with 'origin/docs'. --------- Co-authored-by: tianywan819 <57418894+tianywan819@users.noreply.github.com> Co-authored-by: acoleman29 --- .gitignore | 6 +- .secrets.example | 15 + admin_only/bco_api.conf | 21 +- admin_only/bco_api.service | 6 +- api/admin.py | 20 - api/apps.py | 26 - api/fixtures/bootstrap.json | 2058 --- api/fixtures/metafixtures | 58 - api/fixtures/metafixtures.json | 10 - api/keys.sh | 16 - api/migrations/0001_initial.py | 159 - api/migrations/0002_auto_20220124_2356.py | 60 - .../0003_rename_meta_table_prefix_table.py | 17 - .../0004_rename_group_info_groupinfo.py | 20 - api/migrations/0005_rename_prefixes_prefix.py | 20 - api/model/groups.py | 466 - api/model/prefix.py | 745 - api/models.py | 145 - api/permissions.py | 273 - api/rdb.sh | 41 - api/request_definitions/GET.schema | 20 - api/request_definitions/POST.schema | 40 - .../DELETE_delete_object_by_id.schema | 55 - .../templates/GET_activate_account.schema | 25 - .../templates/GET_get_object_by_id.schema | 55 - .../GET_retrieve_available_schema.schema | 13 - ...ert_existing_object_between_schemas.schema | 55 - .../POST_convert_payload_to_schema.schema | 44 - .../templates/POST_new_account.schema | 20 - .../POST_object_listing_by_token.schema | 19 - .../templates/POST_objects_draft.schema | 65 - .../templates/POST_objects_publish.schema | 65 - .../templates/POST_read_object.schema | 33 - ...OST_validate_payload_against_schema.schema | 33 - .../method_specific/GET_activate_account.py | 53 - .../method_specific/GET_draft_object_by_id.py | 73 - .../GET_published_object_by_id.py | 111 - ...GET_published_object_by_id_with_version.py | 114 - .../GET_retrieve_available_schema.py | 49 - .../POST_api_accounts_describe.py | 27 - .../method_specific/POST_api_accounts_new.py | 155 - .../POST_api_objects_drafts_create.py | 166 - .../POST_api_objects_drafts_delete.py | 117 - .../POST_api_objects_drafts_modify.py | 171 - .../POST_api_objects_drafts_permissions.py | 158 - ...POST_api_objects_drafts_permissions_set.py | 242 - .../POST_api_objects_drafts_publish.py | 217 - .../POST_api_objects_drafts_read.py | 121 - .../POST_api_objects_drafts_token.py | 238 - .../POST_api_objects_publish.py | 183 - .../POST_api_objects_published.py | 115 - .../POST_api_objects_search.py | 120 - .../method_specific/POST_api_objects_token.py | 25 - .../POST_validate_payload_against_schema.py | 59 - api/scripts/utilities/DbUtils.py | 986 -- api/scripts/utilities/FileUtils.py | 167 - api/scripts/utilities/JsonUtils.py | 308 - api/scripts/utilities/RequestUtils.py | 30 - api/scripts/utilities/ResponseUtils.py | 53 - api/scripts/utilities/SettingsUtils.py | 146 - api/scripts/utilities/UserUtils.py | 268 - api/serializers.py | 27 - api/signals.py | 116 - .../api/account_activation_message.html | 41 - api/urls.py | 163 - .../IEEE_sub/IEEE2791-2020.schema | 178 - .../IEEE_sub/domains/description_domain.json | 165 - .../IEEE_sub/domains/error_domain.json | 24 - .../IEEE_sub/domains/execution_domain.json | 111 - .../IEEE_sub/domains/io_domain.json | 58 - .../IEEE_sub/domains/parametric_domain.json | 42 - .../IEEE_sub/domains/provenance_domain.json | 126 - .../IEEE_sub/domains/usability_domain.json | 16 - api/views.py | 1824 --- authentication/admin.py | 11 +- authentication/apis.py | 484 +- authentication/migrations/0001_initial.py | 14 +- authentication/models.py | 22 +- authentication/selectors.py | 58 +- authentication/services.py | 203 +- authentication/urls.py | 21 +- bcodb/fixtures/local_data.json | 6544 -------- bcodb/fixtures/test_portal.json | 6544 -------- bcodb/settings.py | 288 - bcodb/urls.py | 16 - {api => biocompute}/__init__.py | 0 biocompute/admin.py | 44 + biocompute/apis.py | 705 + biocompute/migrations/0001_initial.py | 32 + {api => biocompute}/migrations/__init__.py | 0 biocompute/models.py | 76 + biocompute/selectors.py | 271 + biocompute/services.py | 621 + biocompute/urls.py | 18 + .../IEEE/2791object.json | 0 .../IEEE/description_domain.json | 0 .../IEEE/error_domain.json | 0 .../IEEE/execution_domain.json | 0 .../IEEE/io_domain.json | 0 .../IEEE/parametric_domain.json | 0 .../IEEE/provenance_domain.json | 0 .../IEEE/usability_domain.json | 0 {api/model => config}/__init__.py | 0 {bcodb => config}/asgi.py | 2 +- config/bco_scores.py | 56 + config/fixtures/local_data.json | 4409 ++++++ config/fixtures/test_portal.json | 4409 ++++++ config/services.py | 124 + config/settings.py | 184 + config/urls.py | 58 + {bcodb => config}/wsgi.py | 2 +- docs/bco_scores.json | 13216 ++++++++++++++++ docs/refactor.md | 68 + manage.py | 2 +- {api/scripts => prefix}/__init__.py | 0 prefix/admin.py | 7 + prefix/apis.py | 522 + .../__init__.py => prefix/apps.py | 0 prefix/migrations/0001_initial.py | 30 + .../migrations}/__init__.py | 0 prefix/models.py | 34 + prefix/selectors.py | 156 + prefix/services.py | 286 + prefix/urls.py | 22 + search/apis.py | 185 +- search/selectors.py | 95 +- search/urls.py | 6 +- server.conf | 74 - sever.conf.example | 74 - tests/fixtures/bco_dump.json | 5216 ++++++ tests/fixtures/old_test_data.json | 7030 ++++++++ tests/fixtures/test_data.json | 11271 +++++-------- tests/fixtures/testing_bcos.py | 1013 ++ {bcodb => tests/test_apis}/__init__.py | 0 .../test_api_authentication}/__init__.py | 0 .../test_account_activate.py | 50 + .../test_account_describe.py} | 0 .../test_account_new.py} | 10 +- .../test_api_auth_add.py | 4 +- .../test_api_auth_remove.py | 1 - .../test_api_auth_reset_token.py | 5 +- .../test_api_authentication/test_auth_add.py | 60 + .../test_auth_reset_token.py | 34 + .../testi_auth_remove.py | 51 + .../test_apis/test_api_prefix/__init__.py | 0 .../test_api_prefix/test_prefixes_create.py} | 82 +- .../test_api_prefix/test_prefixes_modify.py | 149 + tests/test_apis/test_biocompute/__init__.py | 0 .../test_objects_drafts_create.py | 135 + .../test_objects_drafts_modify.py | 124 + .../test_objects_drafts_publish.py | 94 + tests/test_database.py | 45 - tests/test_fixtures.py | 119 - tests/test_models | 33 - tests/test_views/test_api_account_activate.py | 30 - .../test_views/test_api_groups_group_info.py | 89 - tests/test_views/test_api_groups_modify.py | 90 - tests/test_views/test_api_objects.py | 82 - .../test_api_objects_drafts_create.py | 111 - .../test_api_objects_drafts_modify.py | 191 - .../test_api_objects_drafts_publish.py | 111 - tests/test_views/test_api_objects_search.py | 54 - tests/test_views/test_api_objects_validate.py | 90 - tests/test_views/test_api_prefixes_token.py | 46 - tests/test_views/test_get_object_id_draft.py | 53 - tests/test_views/test_get_objectid.py | 30 - .../test_views/test_published_object_by_id.py | 26 - token.json | 1 - 168 files changed, 44484 insertions(+), 34126 deletions(-) create mode 100644 .secrets.example delete mode 100755 api/admin.py delete mode 100755 api/apps.py delete mode 100644 api/fixtures/bootstrap.json delete mode 100644 api/fixtures/metafixtures delete mode 100644 api/fixtures/metafixtures.json delete mode 100755 api/keys.sh delete mode 100644 api/migrations/0001_initial.py delete mode 100644 api/migrations/0002_auto_20220124_2356.py delete mode 100644 api/migrations/0003_rename_meta_table_prefix_table.py delete mode 100644 api/migrations/0004_rename_group_info_groupinfo.py delete mode 100644 api/migrations/0005_rename_prefixes_prefix.py delete mode 100644 api/model/groups.py delete mode 100644 api/model/prefix.py delete mode 100755 api/models.py delete mode 100644 api/permissions.py delete mode 100755 api/rdb.sh delete mode 100755 api/request_definitions/GET.schema delete mode 100755 api/request_definitions/POST.schema delete mode 100755 api/request_definitions/templates/DELETE_delete_object_by_id.schema delete mode 100755 api/request_definitions/templates/GET_activate_account.schema delete mode 100755 api/request_definitions/templates/GET_get_object_by_id.schema delete mode 100755 api/request_definitions/templates/GET_retrieve_available_schema.schema delete mode 100755 api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema delete mode 100755 api/request_definitions/templates/POST_convert_payload_to_schema.schema delete mode 100755 api/request_definitions/templates/POST_new_account.schema delete mode 100755 api/request_definitions/templates/POST_object_listing_by_token.schema delete mode 100755 api/request_definitions/templates/POST_objects_draft.schema delete mode 100755 api/request_definitions/templates/POST_objects_publish.schema delete mode 100755 api/request_definitions/templates/POST_read_object.schema delete mode 100755 api/request_definitions/templates/POST_validate_payload_against_schema.schema delete mode 100755 api/scripts/method_specific/GET_activate_account.py delete mode 100755 api/scripts/method_specific/GET_draft_object_by_id.py delete mode 100755 api/scripts/method_specific/GET_published_object_by_id.py delete mode 100755 api/scripts/method_specific/GET_published_object_by_id_with_version.py delete mode 100755 api/scripts/method_specific/GET_retrieve_available_schema.py delete mode 100755 api/scripts/method_specific/POST_api_accounts_describe.py delete mode 100755 api/scripts/method_specific/POST_api_accounts_new.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_create.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_delete.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_modify.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_permissions.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_publish.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_read.py delete mode 100755 api/scripts/method_specific/POST_api_objects_drafts_token.py delete mode 100755 api/scripts/method_specific/POST_api_objects_publish.py delete mode 100644 api/scripts/method_specific/POST_api_objects_published.py delete mode 100755 api/scripts/method_specific/POST_api_objects_search.py delete mode 100755 api/scripts/method_specific/POST_api_objects_token.py delete mode 100755 api/scripts/method_specific/POST_validate_payload_against_schema.py delete mode 100755 api/scripts/utilities/DbUtils.py delete mode 100755 api/scripts/utilities/FileUtils.py delete mode 100755 api/scripts/utilities/JsonUtils.py delete mode 100755 api/scripts/utilities/RequestUtils.py delete mode 100755 api/scripts/utilities/ResponseUtils.py delete mode 100755 api/scripts/utilities/SettingsUtils.py delete mode 100755 api/scripts/utilities/UserUtils.py delete mode 100755 api/serializers.py delete mode 100644 api/signals.py delete mode 100644 api/templates/api/account_activation_message.html delete mode 100755 api/urls.py delete mode 100755 api/validation_definitions/IEEE_sub/IEEE2791-2020.schema delete mode 100755 api/validation_definitions/IEEE_sub/domains/description_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/error_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/execution_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/io_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/parametric_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/provenance_domain.json delete mode 100755 api/validation_definitions/IEEE_sub/domains/usability_domain.json delete mode 100755 api/views.py delete mode 100644 bcodb/fixtures/local_data.json delete mode 100644 bcodb/fixtures/test_portal.json delete mode 100644 bcodb/settings.py delete mode 100755 bcodb/urls.py rename {api => biocompute}/__init__.py (100%) mode change 100755 => 100644 create mode 100644 biocompute/admin.py create mode 100644 biocompute/apis.py create mode 100644 biocompute/migrations/0001_initial.py rename {api => biocompute}/migrations/__init__.py (100%) create mode 100644 biocompute/models.py create mode 100644 biocompute/selectors.py create mode 100644 biocompute/services.py create mode 100644 biocompute/urls.py rename {api/validation_definitions => config}/IEEE/2791object.json (100%) rename {api/validation_definitions => config}/IEEE/description_domain.json (100%) rename {api/validation_definitions => config}/IEEE/error_domain.json (100%) rename {api/validation_definitions => config}/IEEE/execution_domain.json (100%) rename {api/validation_definitions => config}/IEEE/io_domain.json (100%) rename {api/validation_definitions => config}/IEEE/parametric_domain.json (100%) rename {api/validation_definitions => config}/IEEE/provenance_domain.json (100%) rename {api/validation_definitions => config}/IEEE/usability_domain.json (100%) rename {api/model => config}/__init__.py (100%) mode change 100644 => 100755 rename {bcodb => config}/asgi.py (82%) create mode 100644 config/bco_scores.py create mode 100644 config/fixtures/local_data.json create mode 100644 config/fixtures/test_portal.json create mode 100644 config/services.py create mode 100644 config/settings.py create mode 100755 config/urls.py rename {bcodb => config}/wsgi.py (82%) create mode 100644 docs/bco_scores.json create mode 100644 docs/refactor.md rename {api/scripts => prefix}/__init__.py (100%) mode change 100755 => 100644 create mode 100644 prefix/admin.py create mode 100644 prefix/apis.py rename api/scripts/method_specific/__init__.py => prefix/apps.py (100%) mode change 100755 => 100644 create mode 100644 prefix/migrations/0001_initial.py rename {api/scripts/utilities => prefix/migrations}/__init__.py (100%) mode change 100755 => 100644 create mode 100644 prefix/models.py create mode 100644 prefix/selectors.py create mode 100644 prefix/services.py create mode 100644 prefix/urls.py delete mode 100644 server.conf delete mode 100644 sever.conf.example create mode 100644 tests/fixtures/bco_dump.json create mode 100644 tests/fixtures/old_test_data.json create mode 100644 tests/fixtures/testing_bcos.py rename {bcodb => tests/test_apis}/__init__.py (100%) mode change 100755 => 100644 rename tests/{test_views => test_apis/test_api_authentication}/__init__.py (100%) create mode 100644 tests/test_apis/test_api_authentication/test_account_activate.py rename tests/{test_views/test_api_accounts_describe.py => test_apis/test_api_authentication/test_account_describe.py} (100%) rename tests/{test_views/test_api_account_new.py => test_apis/test_api_authentication/test_account_new.py} (85%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_add.py (98%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_remove.py (98%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_reset_token.py (89%) create mode 100644 tests/test_apis/test_api_authentication/test_auth_add.py create mode 100644 tests/test_apis/test_api_authentication/test_auth_reset_token.py create mode 100644 tests/test_apis/test_api_authentication/testi_auth_remove.py rename api/validation_definitions/uri_external => tests/test_apis/test_api_prefix/__init__.py (100%) mode change 100755 => 100644 rename tests/{test_views/test_api_prefixes_create.py => test_apis/test_api_prefix/test_prefixes_create.py} (67%) create mode 100644 tests/test_apis/test_api_prefix/test_prefixes_modify.py create mode 100644 tests/test_apis/test_biocompute/__init__.py create mode 100644 tests/test_apis/test_biocompute/test_objects_drafts_create.py create mode 100644 tests/test_apis/test_biocompute/test_objects_drafts_modify.py create mode 100644 tests/test_apis/test_biocompute/test_objects_drafts_publish.py delete mode 100644 tests/test_database.py delete mode 100644 tests/test_fixtures.py delete mode 100644 tests/test_models delete mode 100644 tests/test_views/test_api_account_activate.py delete mode 100644 tests/test_views/test_api_groups_group_info.py delete mode 100644 tests/test_views/test_api_groups_modify.py delete mode 100644 tests/test_views/test_api_objects.py delete mode 100644 tests/test_views/test_api_objects_drafts_create.py delete mode 100644 tests/test_views/test_api_objects_drafts_modify.py delete mode 100644 tests/test_views/test_api_objects_drafts_publish.py delete mode 100644 tests/test_views/test_api_objects_search.py delete mode 100644 tests/test_views/test_api_objects_validate.py delete mode 100644 tests/test_views/test_api_prefixes_token.py delete mode 100644 tests/test_views/test_get_object_id_draft.py delete mode 100644 tests/test_views/test_get_objectid.py delete mode 100644 tests/test_views/test_published_object_by_id.py delete mode 100644 token.json diff --git a/.gitignore b/.gitignore index 72ed20f8..6c6c4d42 100755 --- a/.gitignore +++ b/.gitignore @@ -130,11 +130,8 @@ dmypy.json # --- USER-ADDED IGNORES --- # -# The settings file. -bcodb/settings.py - # The server configuration file. -server.conf +.secrets # The migrations folder. # bco_api/api/migrations/ @@ -143,3 +140,4 @@ server.conf static/ # JetBrains IDEs .idea/ +backups \ No newline at end of file diff --git a/.secrets.example b/.secrets.example new file mode 100644 index 00000000..906863fd --- /dev/null +++ b/.secrets.example @@ -0,0 +1,15 @@ +[DJANGO_KEYS] +SECRET_KEY= +ANON_KEY= + +[SERVER] +PRODUCTION= +DEBUG=True +ALLOWED_HOSTS= +SERVER_VERSION= +HOSTNAME= +HUMAN_READABLE_HOSTNAME= +PUBLIC_HOSTNAME= +SERVER_URL= +DATABASE= +EMAIL_BACKEND= \ No newline at end of file diff --git a/admin_only/bco_api.conf b/admin_only/bco_api.conf index f461eef1..43b6c84f 100644 --- a/admin_only/bco_api.conf +++ b/admin_only/bco_api.conf @@ -40,6 +40,25 @@ server { proxy_pass http://127.0.0.1:8080; } + # BCO API - Draft objects + #location ~* \/BCO_(\d+)\/(\d+)\.(\d+) { + location ~* ^/[a-zA-Z]+_DRAFT_(.*?) { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://unix:/var/run/bco_api.sock; + } + + # BCO API - Published objects + #location ~* \/BCO_(\d+)\/(\d+)\.(\d+) { + location ~* ^/[a-zA-Z]+_(.*?) { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://unix:/var/run/bco_api.sock; + } location / { if ($request_method = 'OPTIONS') { @@ -51,7 +70,7 @@ server { return 204; } - root /var/www/bcoeditor/portal/build/; + root root /var/www/bcoeditor/portal_userdb/client/build; try_files $uri /index.html; #try_files $uri $uri/ =404; } diff --git a/admin_only/bco_api.service b/admin_only/bco_api.service index cb4d0800..b2b46087 100644 --- a/admin_only/bco_api.service +++ b/admin_only/bco_api.service @@ -1,10 +1,10 @@ [Unit] -Description=BCO Test API gunicorn daemon +Description=BCODB API gunicorn daemon Requires=bco_api.socket After=network.target [Service] User=bco_api_user Group=nginx -WorkingDirectory=/var/www/bcoeditor/bco_api/bco_api/ -ExecStart=/var/www/bcoeditor/bco_api/env/bin/gunicorn --access-logfile /var/log/gunicorn/api_stdout.log --log-level=debug --log-file /var/log/gunicorn/api_stderr.log --workers 3 --bind unix:/var/run/bco_api.sock bco_api.wsgi:application +WorkingDirectory=/var/www/bcoeditor/bco_api/ +ExecStart=/var/www/bcoeditor/bco_api/env/bin/gunicorn --access-logfile /var/log/gunicorn/api_stdout.log --log-level=debug --log-file /var/log/gunicorn/api_stderr.log --workers 3 --bind unix:/var/run/bco_api.sock config.wsgi:application diff --git a/api/admin.py b/api/admin.py deleted file mode 100755 index 98a2e528..00000000 --- a/api/admin.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 -"""Django Admin - -Registers models for the Django Admin app -""" - -from django.contrib import admin -from guardian.admin import GuardedModelAdmin -from api.models import BCO, new_users -from api.model.prefix import Prefix, prefix_table -from api.model.groups import GroupInfo - -class BcoModelAdmin(admin.ModelAdmin): - search_fields = ["contents", "object_id"] -admin.site.register(BCO, BcoModelAdmin) -# admin.site.register( -admin.site.register(prefix_table) -admin.site.register(new_users) -admin.site.register(GroupInfo) -admin.site.register(Prefix) diff --git a/api/apps.py b/api/apps.py deleted file mode 100755 index f06dd842..00000000 --- a/api/apps.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 -"""Run code after start-up -TODO: move things from settings.py into here. -Source: https://stackoverflow.com/a/42744626/5029459 -Source: https://docs.djangoproject.com/en/3.2/ref/applications/#django.apps.AppConfig.ready -""" - -import sys -from django.apps import AppConfig -from django.db.models.signals import post_migrate -from api.signals import populate_models - - -class ApiConfig(AppConfig): - """API Configuration""" - - default_auto_field = "django.db.models.AutoField" - name = "api" - - def ready(self): - """Create the anonymous user if they don't exist.""" - - if 'test' in sys.argv or 'loaddata' in sys.argv or 'flush' in sys.argv: - return - else: - post_migrate.connect(populate_models, sender=self) \ No newline at end of file diff --git a/api/fixtures/bootstrap.json b/api/fixtures/bootstrap.json deleted file mode 100644 index 8a3d0590..00000000 --- a/api/fixtures/bootstrap.json +++ /dev/null @@ -1,2058 +0,0 @@ -[ - { - "object_id": "https://w3id.org/biocompute/1.3.0/examples/UVP_BCO.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "spec_version": "https://w3id.org/biocompute/1.3.0/", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "v1.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2018-10-08T18:02:33-0400" - }, - "created": "2017-11-12T12:30:48-0400", - "modified": "2018-10-08T18:35:33-0400", - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "xref": [ - { - "namespace": "pubmed", - "name": "PubMed", - "ids": [ - "00000" - ], - "access_time": "2018-13-02T10:15-05:00" - }, - { - "namespace": "so", - "name": "Sequence Ontology", - "ids": [ - "0000694" - ], - "access_time": "2018-13-02T10:15-05:00" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "1773" - ], - "access_time": "2018-13-02T10:15-05:00" - } - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - } - }, - { - "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HIVE_metagenomics.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2000-09-26T14:43:45-0400" - }, - "created": "2018-11-29T11:29:08-0500", - "modified": "2018-11-30T11:29:08-0500", - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "xref": [ - { - "namespace": "uberon", - "name": "Uber Anatomy Ontology", - "ids": [ - "0001988" - ], - "access_time": "2016-11-30T06:46-0500" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2016-11-30T06:46-0500" - } - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - } - }, - { - "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2/examples/HIVE_metagenomics.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2000-09-26T14:43:45-0400" - }, - "created": "2018-11-29T11:29:08-0500", - "modified": "2018-11-30T11:29:08-0500", - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "xref": [ - { - "namespace": "uberon", - "name": "Uber Anatomy Ontology", - "ids": [ - "0001988" - ], - "access_time": "2016-11-30T06:46-0500" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2016-11-30T06:46-0500" - } - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - } - }, - { - "object_id": "https://raw.githubusercontent.com/biocompute-objects/BCO_Specification/1.4.2glycosylation-sites-UniCarbKB", - "etag": "5741d66ddf7881db33f7075ce8b64b941bd7cc001965f31682e5da9966c7f3ba", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/", - "provenance_domain": { - "name": "glycosylation-sites-UniCarbKB", - "version": "1.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "The dataset has passed the manual and automated QC steps and the readme has also been reviewed", - "reviewer": { - "name": "Rahi Navelkar", - "affiliation": "The George Washington University", - "email": "rsn13@gwu.edu", - "contribution": [ - "curatedBy" - ] - } - } - ], - "created": "2018-02-21T14:46:55-5:00", - "modified": "2018-10-10T11:34:02-5:00", - "contributors": [ - { - "name": "Matthew Campbell", - "affiliation": "Institute for Glycomics, Griffith University, Gold Coast, Queensland, Australia", - "email": "m.campbell2@griffith.edu.au", - "contribution": [ - "contributedBy" - ] - }, - { - "name": "Rahi Navelkar", - "affiliation": "The George Washington University", - "email": "rsn13@gwu.edu", - "contribution": [ - "curatedBy" - ] - }, - { - "name": "Robel Kahsay", - "affiliation": "The George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy" - ] - } - ], - "license": "https://creativecommons.org/licenses/by/4.0/" - }, - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128]" - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/license/license_extension.json", - "license_extension": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "scripts_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - } - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/GW-HIVE/glygen-backend-integration/", - "scm_type": "git", - "scm_commit": "d34b85553e775dd5452005d786fe6e47d6048ee0", - "scm_path": "/data/projects/glygen/generated/datasets/reviewed/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.readme.txt" - } - } - ], - "description_domain": { - "keywords": [ - "protein", - "canonical", - "glycosylation", - "glycan" - ], - "xref": [ - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2018-21-02T14:46:55-5:00" - } - ], - "platform": [ - "centos7" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "ac2canonical.py", - "description": "Python script for mapping the UniProtKB accessions in the input file to the UniProtKB canonical accessions ", - "version": "", - "input_list": [ - { - "uri": "/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" - } - ], - "output_list": [ - { - "uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" - } - ] - }, - { - "step_number": 2, - "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py", - "description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ", - "input_list": [ - { - "uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" - } - ], - "output_list": [ - { - "uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv" - } - ] - }, - { - "step_number": 2, - "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py", - "description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ", - "input_list": [ - { - "uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt" - } - ], - "output_list": [ - { - "uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv" - } - ] - }, - { - "step_number": 3, - "name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py", - "description": "Python script for quality check of the processed file. Records which fall under one or more following criteria's are flagged and eliminated and can be accessed using the log file. The elimination steps include - a. If the protein accession is not included in UniProtKB protein list - UniProtKB Nov-2017 Release b. If the amino acid position does not match to the amino acid on the associated position on fasta sequence - UniProtKB Nov-2017 Release c. If the id (UnicarbKB structure id) is not present in input file d. If the glycosylation type (linkage type) is not retrieved through step 3 e. If a serine or threonine is reported for an N-linked glycan structure f. If an asparagine is reported for an O-linked glycan structure", - "input_list": [ - { - "uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv" - }, - { - "uri": "human_protein_all.fasta" - } - ], - "output_list": [ - { - "uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv" - }, - { - "uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/ac2canonical.py" - } - }, - { - "uri": { - "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2a.py" - } - }, - { - "uri": { - "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py" - } - }, - { - "uri": { - "uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py" - } - } - ], - "script_driver": "manual", - "software_prerequisites": [ - { - "name": "Python", - "version": "2.7.13", - "uri": { - "uri": "https://www.python.org/downloads/release/python-2713/", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "17add4bf0ad0ec2f08e0cae6d205c700" - } - } - ], - "external_data_endpoints": [ - { - "name": "UniCarbKB", - "url": "http://www.unicarbkb.org/" - }, - { - "name": "access glygen-backend-integration", - "url": "https://github.com/glygener/glygen-backend-integration" - } - ], - "environment_variables": { - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt", - "uri": "http://data.glygen.org/datasets/source/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt", - "access_time": "2018-10-10T11:34:02-5:00" - } - }, - { - "uri": { - "filename": "human_protein_all.fasta", - "uri": "http://data.glygen.org/GLYDS00053", - "access_time": "2018-10-10T11:34:02-5:00" - } - } - ], - "output_subdomain": [ - { - "mediatype": "csv/text", - "uri": { - "filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log", - "uri": "http://data.glygen.org/datasets/logs/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log", - "access_time": "2018-10-10T11:37:02-5:00" - } - }, - { - "mediatype": "csv/text", - "uri": { - "filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv", - "uri": "http://data.glygen.org/GLYDS00040", - "access_time": "2018-10-10T11:37:02-5:00" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "comment": "Unique value statistics for the dataset", - "statistics": [ - { - "key": "uniprotkb_canonical_ac", - "value": 92, - "description": "Accession assigned to the protein isoform chosen to be the canonical sequence in UniProtKB database" - }, - { - "key": "glycosylation_site", - "value": 223, - "description": "Site on the protein sequence where glycosylation is observed" - }, - { - "key": "evidence", - "value": 163, - "description": "NCBI PubMed Id (PMID) as evidence for the entry" - }, - { - "key": "unicarbkb_id", - "value": 984, - "description": "UnicarbKB data structure identifier" - }, - { - "key": "glytoucan_ac", - "value": 824, - "description": "Unique accession assigned to the registered glycan structure in GlyTouCan database" - }, - { - "key": "amino_acid", - "value": 3, - "description": "Three letter code abbreviation of the amino acid" - }, - { - "key": "glycosylation_type", - "value": 3, - "description": "Type of glycosylation linkage type" - } - ] - }, - "algorithmic_error": { - } - } - } -] \ No newline at end of file diff --git a/api/fixtures/metafixtures b/api/fixtures/metafixtures deleted file mode 100644 index d66ca52f..00000000 --- a/api/fixtures/metafixtures +++ /dev/null @@ -1,58 +0,0 @@ -[ - { - "model": "api.bco_draft_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.bco_publish_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.galaxy_draft_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.galaxy_publish_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.glygen_draft_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.glygen_publish_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.oncomx_draft_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - }, - { - "model": "api.oncomx_publish_meta", - "pk": 1, - "fields": { - "n_objects": "1" - } - } -] diff --git a/api/fixtures/metafixtures.json b/api/fixtures/metafixtures.json deleted file mode 100644 index d222310b..00000000 --- a/api/fixtures/metafixtures.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "model": "api.prefix_table", - "pk": 1, - "fields": { - "prefix": "BCO", - "n_objects": "1" - } - } -] diff --git a/api/keys.sh b/api/keys.sh deleted file mode 100755 index ed33c02b..00000000 --- a/api/keys.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/bash - -cd .. - -echo " " -echo " " -echo "Anon key for the installation is..." -sqlite3 db.sqlite3 'SELECT B.key FROM auth_user AS A JOIN authtoken_token AS B ON A.id = B.user_id WHERE A.username = "anon";' -echo " " -echo " " -echo "Wheel key for the installation is..." -sqlite3 db.sqlite3 'SELECT B.key FROM auth_user AS A JOIN authtoken_token AS B ON A.id = B.user_id WHERE A.username = "wheel";' -echo " " -echo " " - -cd api \ No newline at end of file diff --git a/api/migrations/0001_initial.py b/api/migrations/0001_initial.py deleted file mode 100644 index 538b352a..00000000 --- a/api/migrations/0001_initial.py +++ /dev/null @@ -1,159 +0,0 @@ -# Generated by Django 3.2 on 2021-10-01 12:33 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion -import django.utils.timezone - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ("auth", "0012_alter_user_first_name_max_length"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.CreateModel( - name="meta_table", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("n_objects", models.IntegerField()), - ("prefix", models.CharField(max_length=5)), - ], - ), - migrations.CreateModel( - name="new_users", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("email", models.EmailField(max_length=254)), - ("temp_identifier", models.TextField(max_length=100)), - ("token", models.TextField(blank=True, null=True)), - ("hostname", models.TextField(blank=True, null=True)), - ("created", models.DateTimeField(default=django.utils.timezone.now)), - ], - ), - migrations.CreateModel( - name="prefixes", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("prefix", models.CharField(max_length=5)), - ( - "owner_group", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="auth.group", - to_field="name", - ), - ), - ( - "owner_user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to=settings.AUTH_USER_MODEL, - to_field="username", - ), - ), - ], - ), - migrations.CreateModel( - name="group_info", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "delete_members_on_group_deletion", - models.BooleanField(default=False), - ), - ("description", models.TextField()), - ("expiration", models.DateTimeField(blank=True, null=True)), - ("max_n_members", models.IntegerField(blank=True, null=True)), - ( - "group", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="auth.group", - to_field="name", - ), - ), - ( - "owner_user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to=settings.AUTH_USER_MODEL, - to_field="username", - ), - ), - ], - ), - migrations.CreateModel( - name="bco", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("contents", models.JSONField()), - ("object_class", models.TextField(blank=True, null=True)), - ("object_id", models.TextField()), - ("prefix", models.CharField(max_length=5)), - ("schema", models.TextField()), - ("state", models.TextField()), - ("last_update", models.DateTimeField()), - ( - "owner_group", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="auth.group", - to_field="name", - ), - ), - ( - "owner_user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to=settings.AUTH_USER_MODEL, - to_field="username", - ), - ), - ], - ), - ] diff --git a/api/migrations/0002_auto_20220124_2356.py b/api/migrations/0002_auto_20220124_2356.py deleted file mode 100644 index 778906d1..00000000 --- a/api/migrations/0002_auto_20220124_2356.py +++ /dev/null @@ -1,60 +0,0 @@ -# Generated by Django 3.2.10 on 2022-01-24 23:56 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion -import django.utils.timezone - - -class Migration(migrations.Migration): - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("api", "0001_initial"), - ] - - operations = [ - migrations.AddField( - model_name="prefixes", - name="certifying_key", - field=models.TextField(blank=True, null=True), - ), - migrations.AddField( - model_name="prefixes", - name="certifying_server", - field=models.TextField(blank=True, null=True), - ), - migrations.AddField( - model_name="prefixes", - name="created", - field=models.DateTimeField( - blank=True, default=django.utils.timezone.now, null=True - ), - ), - migrations.AddField( - model_name="prefixes", - name="created_by", - field=models.ForeignKey( - default="wheel", - on_delete=django.db.models.deletion.CASCADE, - related_name="created_by", - to=settings.AUTH_USER_MODEL, - to_field="username", - ), - ), - migrations.AddField( - model_name="prefixes", - name="description", - field=models.TextField(blank=True, null=True), - ), - migrations.AddField( - model_name="prefixes", - name="expires", - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AlterField( - model_name="group_info", - name="description", - field=models.TextField(blank=True), - ), - ] diff --git a/api/migrations/0003_rename_meta_table_prefix_table.py b/api/migrations/0003_rename_meta_table_prefix_table.py deleted file mode 100644 index 697f102d..00000000 --- a/api/migrations/0003_rename_meta_table_prefix_table.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 3.2.10 on 2022-01-25 00:14 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ("api", "0002_auto_20220124_2356"), - ] - - operations = [ - migrations.RenameModel( - old_name="meta_table", - new_name="prefix_table", - ), - ] diff --git a/api/migrations/0004_rename_group_info_groupinfo.py b/api/migrations/0004_rename_group_info_groupinfo.py deleted file mode 100644 index 94c31c1f..00000000 --- a/api/migrations/0004_rename_group_info_groupinfo.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 3.2.10 on 2022-03-22 17:57 - -from django.conf import settings -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("auth", "0012_alter_user_first_name_max_length"), - ("api", "0003_rename_meta_table_prefix_table"), - ] - - operations = [ - migrations.RenameModel( - old_name="group_info", - new_name="GroupInfo", - ), - ] diff --git a/api/migrations/0005_rename_prefixes_prefix.py b/api/migrations/0005_rename_prefixes_prefix.py deleted file mode 100644 index d253bdfc..00000000 --- a/api/migrations/0005_rename_prefixes_prefix.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 3.2.10 on 2022-03-22 18:29 - -from django.conf import settings -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("auth", "0012_alter_user_first_name_max_length"), - ("api", "0004_rename_group_info_groupinfo"), - ] - - operations = [ - migrations.RenameModel( - old_name="prefixes", - new_name="Prefix", - ), - ] diff --git a/api/model/groups.py b/api/model/groups.py deleted file mode 100644 index 2bda7bdf..00000000 --- a/api/model/groups.py +++ /dev/null @@ -1,466 +0,0 @@ -#!/usr/bin/env python3 -"""Functions for operations with groups -""" - -import sys -from django.db import models -from django.db.models.signals import post_save -from django.contrib.auth.models import Group, User -from django.dispatch import receiver -from rest_framework import status -from rest_framework.response import Response - -from api.scripts.utilities.DbUtils import DbUtils -from api.scripts.utilities.UserUtils import UserUtils -from api.models import BCO - -usr_utils = UserUtils() -db_utils = DbUtils() - - -class GroupInfo(models.Model): - """Some additional information for Group. - This information is stored separately from - Group so as to not complicate or compromise - anything relating to authentication. - Delete group members on group deletion? - """ - - delete_members_on_group_deletion = models.BooleanField(default=False) - description = models.TextField(blank=True) - expiration = models.DateTimeField(blank=True, null=True) - group = models.ForeignKey(Group, on_delete=models.CASCADE, to_field="name") - max_n_members = models.IntegerField(blank=True, null=True) - owner_user = models.ForeignKey(User, on_delete=models.CASCADE, to_field="username") - - def __str__(self): - """String for representing the GroupInfo model (in Admin site etc.).""" - return f"{self.group}" - - -def post_api_groups_info(request): - """Retrieve Group information by user""" - - user = usr_utils.user_from_request(request=request) - - try: - bulk_request = request.data["POST_api_groups_info"] - - group_info = [] - - for index, value in enumerate(bulk_request["names"]): - group = Group.objects.get(name=value) - - try: - admin = GroupInfo.objects.get(group=value).owner_user == user - description = GroupInfo.objects.get(group=value).description - except GroupInfo.DoesNotExist: - admin = False - description = "N/A" - - group_permissions = list( - group.permissions.all().values_list("codename", flat=True) - ) - group_members = list(group.user_set.all().values_list("username", flat=True)) - group_info.append( - { - "name": group.name, - "permissions": group_permissions, - "members": group_members, - "admin": admin, - "description": description, - } - ) - except Exception as error: - return Response( - status=status.HTTP_400_BAD_REQUEST, - data={"message": "Bad request. Request is not formatted correctly."} - ) - - return Response(status=status.HTTP_200_OK, data=group_info) - - -def post_api_groups_create(request): - """ - Instantiate any necessary imports. - Not guaranteed which of username and group - will be provided. - Create the optional keys if they haven't - been provided. - The group has not been created, so create it. - Update the group info. - TODO: Expiration needs to be casted to a datetime object; will likely - need to be separate fields in UI - The expiration field can't be a blank string because django will complain - about the field being a DateTimeField and thus requiring a particular - format for "blank" or "null" as defined in the model. - - Note the bool typecast for delete_members_on_group_deletion, - this is necessary since the request to create the group - doesn't have a concept of type bool. - Add users which exist and give an error for those that don't. - - As this view is for a bulk operation, status 200 - means that the request was successfully processed, - but NOT necessarily each item in the request. - """ - - bulk_request = request.data["POST_api_groups_create"] - group_admin = usr_utils.user_from_request(request=request) - groups = list(Group.objects.all().values_list("name", flat=True)) - return_data = [] - any_failed = False - - for creation_object in bulk_request: - - standardized = creation_object["name"].lower() - if standardized not in groups: - if "usernames" not in creation_object: - creation_object["usernames"] = [] - if "delete_members_on_group_deletion" not in creation_object: - creation_object["delete_members_on_group_deletion"] = False - - if "description" not in creation_object: - creation_object["description"] = "" - - if "max_n_members" not in creation_object: - creation_object["max_n_members"] = -1 - - Group.objects.create(name=creation_object["name"]) - group_admin.groups.add(Group.objects.get(name=creation_object["name"])) - - if ( - "expiration" not in creation_object - or creation_object["expiration"] == "" - ): - GroupInfo.objects.create( - delete_members_on_group_deletion=bool( - creation_object["delete_members_on_group_deletion"] - ), - description=creation_object["description"], - group=Group.objects.get(name=creation_object["name"]), - max_n_members=creation_object["max_n_members"], - owner_user=group_admin, - ) - else: - GroupInfo.objects.create( - delete_members_on_group_deletion=bool( - creation_object["delete_members_on_group_deletion"] - ), - description=creation_object["description"], - expiration=creation_object["expiration"], - group=Group.objects.get(name=creation_object["name"]), - max_n_members=creation_object["max_n_members"], - owner_user=group_admin, - ) - - users_added = [] - users_excluded = [] - - for usrnm in creation_object["usernames"]: - if usr_utils.check_user_exists(user_name=usrnm): - User.objects.get(username=usrnm).groups.add( - Group.objects.get(name=creation_object["name"]) - ) - users_added.append(usrnm) - else: - users_excluded.append(usrnm) - - if len(users_excluded) > 0: - return_data.append( - db_utils.messages( - parameters={ - "group": standardized, - "users_excluded": users_excluded, - } - )["201_group_users_excluded"] - ) - - else: - return_data.append( - db_utils.messages(parameters={"group": standardized})[ - "201_group_create" - ] - ) - - else: - # Update the request status. - return_data.append( - db_utils.messages(parameters={"group": standardized})[ - "409_group_conflict" - ] - ) - any_failed = True - - if any_failed: - return Response(status=status.HTTP_207_MULTI_STATUS, data=return_data) - - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_groups_delete(request): - """Instantiate any necessary imports.""" - - bulk_request = request.data["POST_api_groups_delete"]["names"] - - # Establish who has made the request. - requestor_info = usr_utils.user_from_request(request=request) - - # Get all group names. - - # This is a better solution than querying for - # each individual group name. - groups = list(Group.objects.all().values_list("name", flat=True)) - - # Construct an array to return information about processing - # the request. - return_data = [] - any_failed = False - - # Since bulk_request is an array, go over each - # item in the array. - for deletion_object in bulk_request: - # Standardize the group name. - standardized = deletion_object.lower() - deleted_count = 0 - if standardized in groups: - # Get the group and its information. - grouped = Group.objects.get(name=standardized) - group_information = GroupInfo.objects.get(group=grouped.name) - - # Check that the requestor is the group admin. - if requestor_info.username == group_information.owner_user.username: - # Delete the group, checking to see if all users - # in the group also get deleted. - if group_information.delete_members_on_group_deletion: - # Delete all members of the group. - User.objects.filter(groups__name=grouped.name).delete() - # Delete the group itself. - deleted_count, deleted_info = grouped.delete() - if deleted_count < 2: - # Too few deleted, error with this delete - return_data.append( - db_utils.messages(parameters={"group": grouped.name})[ - "404_missing_bulk_parameters" - ] - ) - any_failed = True - continue - - elif deleted_count > 2: - print(deleted_count, "deleted_count") - # We don't expect there to be duplicates, so while this was successful it should throw a warning - return_data.append( - db_utils.messages(parameters={"group": grouped.name})[ - "418_too_many_deleted" - ] - ) - any_failed = True - continue - # Everything looks OK - return_data.append( - db_utils.messages(parameters={"group": grouped.name})[ - "200_OK_group_delete" - ] - ) - else: - # Requestor is not the admin. - return_data.append( - db_utils.messages(parameters={})["403_insufficient_permissions"] - ) - any_failed = True - else: - # Update the request status. - return_data.append(db_utils.messages(parameters={})["400_bad_request"]) - any_failed = True - - if any_failed: - return Response(status=status.HTTP_207_MULTI_STATUS, data=return_data) - - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_groups_modify(request): - """Instantiate any necessary imports. - TODO: This needs a serious revamp... Permissions and specific groups need - to be adjusted. IE no one should be able to change a group without GroupInfo. - """ - try: - bulk_request = request.data["POST_api_groups_modify"] - except: - return Response(status=status.HTTP_400_BAD_REQUEST) - requestor_info = usr_utils.user_from_request(request=request) - groups = list(Group.objects.all().values_list("name", flat=True)) - return_data = [] - for modification_object in bulk_request: - standardized = modification_object["name"].lower() - - if standardized in groups: - grouped = Group.objects.get(name=standardized) - if ( - requestor_info.is_superuser == True - or grouped in requestor_info.groups.all() - ): - # TODO: We shouldn't use a try/except as an if statement; I think there is actually - # a get_or_create() function: - # group_information = GroupInfo.objects.get_or_create(group=grouped, owner_user=requestor_info) - # But would need to be tested - try: - group_information = GroupInfo.objects.get(group=grouped) - except: - group_information = GroupInfo.objects.create( - group=grouped, owner_user=requestor_info - ) - if "actions" in modification_object: - action_set = modification_object["actions"] - - # Invalid inputs don't throw 400, 401, or 403 for the - # request. That is, provided parameters that don't - # exist (for example, an owner_user that does not exist) - # simply get skipped over. - # First do the "easy" tasks - name and description. - # Change name of group if set in actions - if "rename" in action_set: - # Simply re-name to whatever we've been provided, - # assuming the group doesn't already exist. - if action_set["rename"] not in groups: - grouped.name = action_set["rename"] - grouped.save() - group_information.group = grouped - group_information.save() - bco_list = BCO.objects.filter(owner_group=standardized) - for bco in bco_list: - bco.owner_group = grouped - bco.save() - - # Change description of group if set in actions. - if "redescribe" in action_set: - group_information.description = action_set["redescribe"] - group_information.save() - - # Now the ownership tasks. - # TODO: Is owner_group defined for this type of object? - # Does not appear to be set, also does not appear to be inherited. - # WARNING: This could cause an error if this is sent in! - if "owner_group" in action_set: - # Make sure the provided owner group exists. - if usr_utils.check_group_exists(name=action_set["owner_group"]): - group_information.owner_group = Group.objects.get( - name=action_set["owner_group"] - ) - group_information.save() - else: - # TODO: This seems to be some type of error state - pass - - if "owner_user" in action_set: - # Make sure the provided owner user exists. - if usr_utils.check_user_exists( - user_name=action_set["owner_user"] - ): - group_information.owner_user = User.objects.get( - username=action_set["owner_user"] - ) - group_information.save() - else: - # TODO: This seems to be some type of error state - pass - - # Finally, perform the set logic to add and remove - # users and groups. - - # Get all users in the group. - all_users = set([i.username for i in list(grouped.user_set.all())]) - - # Removals are processed first, then additions. - # Remove the users provided, if any. - if "remove_users" in action_set: - users = User.objects.filter( - username__in=action_set["remove_users"] - ) - for user in users: - user.groups.remove(grouped) - - # Get the users in the groups provided, if any. - if "disinherit_from" in action_set: - # Get all the groups first, then get the user list. - rm_group_users = list( - User.objects.filter( - groups__in=Group.objects.filter( - name__in=action_set["disinherit_from"] - ) - ).values_list("username", flat=True) - ) - - all_users = all_users - set(rm_group_users) - - # Addition explained at https://stackoverflow.com/a/1306663 - - # Add the users provided, if any. - if "add_users" in action_set: - users = User.objects.filter( - username__in=action_set["add_users"] - ) - for user in users: - user.groups.add(grouped) - - # Get the users in the groups provided, if any. - if "inherit_from" in action_set: - # Get all the groups first, then get the user list. - a_group_users = list( - User.objects.filter( - groups__in=Group.objects.filter( - name__in=action_set["inherit_from"] - ) - ).values_list("username", flat=True) - ) - all_users.update(a_group_users) - else: - pass - return_data.append( - db_utils.messages(parameters={"group": grouped.name})[ - "200_OK_group_modify" - ] - ) - else: - # Requestor is not the admin. - return_data.append( - db_utils.messages(parameters={})["403_insufficient_permissions"] - ) - else: - # Update the request status. - return_data.append(db_utils.messages(parameters={})["400_bad_request"]) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - return Response(status=status.HTTP_200_OK, data=return_data) - - -@receiver(post_save, sender=User) -def associate_user_group(sender, instance, created, **kwargs): - """Create Group and GroupInfo - - Link user creation to groups. - Create a group for this user. - Source: https://stackoverflow.com/a/55206382/5029459 - Automatically add the user to the BCO drafters and publishers groups, - if the user isn't anon or the already existent bco_drafter or bco_publisher. - """ - - if 'test' in sys.argv or 'loaddata' in sys.argv: - return - - else: - if created: - print(instance) - Group.objects.create(name=instance) - group = Group.objects.get(name=instance) - group.user_set.add(instance) - if instance.username not in ["anon", "bco_drafter", "bco_publisher", "AnonymousUser"]: - User.objects.get(username=instance).groups.add( - Group.objects.get(name="bco_drafter") - ) - User.objects.get(username=instance).groups.add( - Group.objects.get(name="bco_publisher") - ) diff --git a/api/model/prefix.py b/api/model/prefix.py deleted file mode 100644 index 496dc922..00000000 --- a/api/model/prefix.py +++ /dev/null @@ -1,745 +0,0 @@ -#!/usr/bin/env python3 -"""Functions for operations with groups -""" - - -import re -import sys -from django.db import models -from django.contrib.auth.models import Group, Permission, User -from django.db.models.signals import post_save, post_delete, pre_save -from django.dispatch import receiver -import django.db.utils as PermErrors -from django.contrib.contenttypes.models import ContentType -from django.utils import timezone -from rest_framework import status -from rest_framework.response import Response - -from api.model.groups import GroupInfo -from api.scripts.utilities import DbUtils -from api.scripts.utilities import UserUtils - - -# Generic meta data model -# TODO: rename to prefix_meta -class prefix_table(models.Model): - """The number of objects for a given prefix.""" - - # Field is required. - n_objects = models.IntegerField() - - # Which prefix the object falls under. - - # Field is required. - prefix = models.CharField(max_length=5) - - def __str__(self): - """String for representing the BCO model (in Admin site etc.).""" - return self.prefix - - -class Prefix(models.Model): - """Link Prefix to groups and users. - - Be careful about related_name. - Source: https://stackoverflow.com/questions/53651114/using-same-foreign-key-twice-in-a-model-in-django-as-different-fields - Which server is this prefix certified with? - What is the certifying key? - """ - - certifying_server = models.TextField(blank=True, null=True) - certifying_key = models.TextField(blank=True, null=True) - created = models.DateTimeField(default=timezone.now, blank=True, null=True) - created_by = models.ForeignKey( - User, - on_delete=models.CASCADE, - related_name="created_by", - to_field="username", - default="wheel", - ) - description = models.TextField(blank=True, null=True) - expires = models.DateTimeField(blank=True, null=True) - owner_group = models.ForeignKey(Group, on_delete=models.CASCADE, to_field="name") - owner_user = models.ForeignKey(User, on_delete=models.CASCADE, to_field="username") - prefix = models.CharField(max_length=5) - - def __str__(self): - """String for representing the BCO model (in Admin site etc.).""" - return f"{self.prefix}" - - -def post_api_prefixes_create(request): - """Create a prefix - - Create a prefix to be used to classify BCOs and to determine permissions - for objects created under that prefix. The requestor must be in the group - prefix_admins to create a prefix. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - db_utils = DbUtils.DbUtils() - user_utils = UserUtils.UserUtils() - bulk_request = request.data["POST_api_prefixes_create"] - unavailable = list(Prefix.objects.all().values_list("prefix", flat=True)) - return_data = [] - any_failed = False - for creation_object in bulk_request: - try: - owner_user = User.objects.get(username=creation_object["owner_user"]) - except User.DoesNotExist: - return_data.append( - db_utils.messages(parameters={"username": creation_object["owner_user"]})[ - "404_user_not_found" - ] - ) - any_failed = True - continue - if creation_object["owner_group"] == "bco_drafter": - is_public = True - else: - is_public = False - for prfx in creation_object["prefixes"]: - standardized = prfx["prefix"].upper() - if not re.match(r"^[A-Z]{3,5}$", standardized): - return_data.append( - db_utils.messages(parameters={"prefix": standardized})[ - "400_bad_request_malformed_prefix" - ] - ) - any_failed = True - continue - - if standardized in unavailable: - return_data.append( - db_utils.messages(parameters={"prefix": standardized})[ - "409_prefix_conflict" - ] - ) - any_failed = True - continue - - if "expiration_date" in prfx: - if ( - db_utils.check_expiration(dt_string=prfx["expiration_date"]) - is not None - ): - return_data.append( - db_utils.messages( - parameters={"expiration_date": prfx["expiration_date"]} - )["400_invalid_expiration_date"] - ) - any_failed = True - continue - - draft = prfx["prefix"].lower() + "_drafter" - publish = prfx["prefix"].lower() + "_publisher" - - if len(Group.objects.filter(name=draft)) != 0: - drafters = Group.objects.get(name=draft) - owner_user.groups.add(drafters) - else: - Group.objects.create(name=draft) - drafters = Group.objects.get(name=draft) - owner_user.groups.add(drafters) - GroupInfo.objects.create( - delete_members_on_group_deletion=False, - description=prfx["description"], - group=drafters, - max_n_members=-1, - owner_user=owner_user, - ) - - if len(Group.objects.filter(name=publish)) != 0: - publishers = Group.objects.get(name=publish) - owner_user.groups.add(publishers) - else: - Group.objects.create(name=publish) - publishers = Group.objects.get(name=publish) - owner_user.groups.add(publishers) - GroupInfo.objects.create( - delete_members_on_group_deletion=False, - description=prfx["description"], - group=publishers, - max_n_members=-1, - owner_user=owner_user, - ) - if is_public is True: - owner_group = "bco_drafter" - else: - owner_group = publish - - write_result = DbUtils.DbUtils().write_object( - p_app_label="api", - p_model_name="Prefix", - p_fields=[ - "created_by", - "description", - "owner_group", - "owner_user", - "prefix", - ], - p_data={ - "created_by": user_utils.user_from_request( - request=request - ).username, - "description": prfx["description"], - "owner_group": owner_group, - "owner_user": creation_object["owner_user"], - "prefix": standardized, - }, - ) - if write_result != 1: - return_data.append( - db_utils.messages(parameters={"prefix": standardized})[ - "409_prefix_conflict" - ] - ) - any_failed = True - continue - - return_data.append( - db_utils.messages(parameters={"prefix": standardized})[ - "201_prefix_create" - ] - ) - - if any_failed and len(return_data) == 1: - return Response(status=return_data[0]["status_code"], data=return_data) - - if any_failed and len(return_data) > 1: - return Response(status=status.HTTP_207_MULTI_STATUS, data=return_data) - - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_prefixes_delete(request): - """Deletes a prefix - - The requestor must be in the group prefix_admins to delete a prefix. - Any object created under this prefix will have its permissions "locked out." - This means that any other view which relies on object-level permissions, such - as /api/objects/drafts/read/, will not allow any requestor access to particular - objects. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - db_utils = DbUtils.DbUtils() - - bulk_request = request.data["POST_api_prefixes_delete"] - - # Get all existing prefixes. - unavailable = list(Prefix.objects.all().values_list("prefix", flat=True)) - - return_data = [] - - for creation_object in bulk_request: - - # Create a list to hold information about errors. - errors = {} - - # Standardize the prefix name. - standardized = creation_object.upper() - - # Create a flag for if one of these checks fails. - error_check = False - - if standardized not in unavailable: - error_check = True - # Update the request status. - errors["404_missing_prefix"] = db_utils.messages( - parameters={"prefix": standardized} - )["404_missing_prefix"] - - if error_check is False: - # The prefix exists, so delete it. - # No need to use DB Utils here, - # just delete straight up. - # Source: https://stackoverflow.com/a/3681691 - # Django *DOESN'T* want primary keys now... - prefixed = Prefix.objects.get(prefix=standardized) - prefixed.delete() - # Deleted the prefix. - errors["200_OK_prefix_delete"] = db_utils.messages( - parameters={"prefix": standardized} - )["200_OK_prefix_delete"] - - # Append the possible "errors". - return_data.append(errors) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_prefixes_modify(request): - """Modify a Prefix - - Modify a prefix which already exists. - The requestor *must* be in the group prefix_admins to modify a prefix. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - # Instantiate any necessary imports. - db_utils = DbUtils.DbUtils() - user_utils = UserUtils.UserUtils() - - bulk_request = request.data["POST_api_prefixes_modify"] - unavailable = list(Prefix.objects.all().values_list("prefix", flat=True)) - - # Construct an array to return information about processing - # the request. - return_data = [] - - # Since bulk_request is an array, go over each - # item in the array. - for creation_object in bulk_request: - - # Go over each prefix proposed. - for prfx in creation_object["prefixes"]: - - # Create a list to hold information about errors. - errors = {} - - # Standardize the prefix name. - standardized = prfx["prefix"].upper() - - # Create a flag for if one of these checks fails. - error_check = False - - if standardized not in unavailable: - - error_check = True - - # Update the request status. - # Bad request. - errors["404_missing_prefix"] = db_utils.messages( - parameters={"prefix": standardized} - )["404_missing_prefix"] - - # Does the user exist? - if ( - user_utils.check_user_exists(user_name=creation_object["owner_user"]) - is False - ): - - error_check = True - - # Bad request. - errors["404_user_not_found"] = db_utils.messages( - parameters={"username": creation_object["owner_user"]} - )["404_user_not_found"] - - # Does the group exist? - if ( - user_utils.check_group_exists(name=creation_object["owner_group"]) - is False - ): - - error_check = True - - # Bad request. - errors["404_group_not_found"] = db_utils.messages( - parameters={"group": creation_object["owner_group"]} - )["404_group_not_found"] - - # Was the expiration date validly formatted and, if so, - # is it after right now? - if "expiration_date" in prfx: - if ( - db_utils.check_expiration(dt_string=prfx["expiration_date"]) - is not None - ): - - error_check = True - - # Bad request. - errors["400_invalid_expiration_date"] = db_utils.messages( - parameters={"expiration_date": prfx["expiration_date"]} - )["400_invalid_expiration_date"] - - # Did any check fail? - if error_check is False: - - # The prefix has not been created, so create it. - DbUtils.DbUtils().write_object( - p_app_label="api", - p_model_name="Prefix", - p_fields=[ - "created_by", - "description", - "owner_group", - "owner_user", - "prefix", - ], - p_data={ - "created_by": user_utils.user_from_request( - request=request - ).username, - "description": prfx["description"], - "owner_group": creation_object["owner_group"], - "owner_user": creation_object["owner_user"], - "prefix": standardized, - }, - ) - - # Created the prefix. - errors["201_prefix_modify"] = db_utils.messages( - parameters={"prefix": standardized} - )["201_prefix_modify"] - - # Append the possible "errors". - return_data.append(errors) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_prefixes_permissions_set(request): - """Set the permissions for prefixes.""" - - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - uu = UserUtils.UserUtils() - - # First, get which user we're dealing with. - user = uu.user_from_request(request=request) - - # Define the bulk request. - bulk_request = request.data["POST_api_prefixes_permissions_set"] - - # Get all existing prefixes. - unavailable = list(Prefix.objects.all().values_list("prefix", flat=True)) - - # Construct an array to return information about processing - # the request. - return_data = [] - - # Since bulk_request is an array, go over each - # item in the array. - for creation_object in bulk_request: - - # Go over each prefix proposed. - for prfx in creation_object["prefixes"]: - - # Create a list to hold information about errors. - errors = {} - - # Standardize the prefix name. - standardized = prfx.upper() - - # Create a flag for if one of these checks fails. - error_check = False - - # Has the prefix already been created? - if standardized not in unavailable: - - error_check = True - - # Update the request status. - errors["404_missing_prefix"] = db.messages( - parameters={"prefix": standardized} - )["404_missing_prefix"] - - # The prefix exists, but is the requestor the owner? - if ( - uu.check_user_owns_prefix(user_name=user.username, prfx=standardized) - is False - and user.username != "wheel" - ): - - error_check = True - - # Bad request, the user isn't the owner or wheel. - errors["403_requestor_is_not_prefix_owner"] = db.messages( - parameters={"prefix": standardized} - )["403_requestor_is_not_prefix_owner"] - - # The "expensive" work of assigning permissions is held off - # if any of the above checks fails. - - # Did any check fail? - if error_check is False: - - # Split out the permissions assignees into users and groups. - assignees = {"group": [], "username": []} - - if "username" in creation_object: - assignees["username"] = creation_object["username"] - - if "group" in creation_object: - assignees["group"] = creation_object["group"] - - # Go through each one. - for user_name in assignees["username"]: - - # Create a list to hold information about sub-errors. - sub_errors = {} - - # Create a flag for if one of these sub-checks fails. - sub_error_check = False - - # Get the user whose permissions are being assigned. - if uu.check_user_exists(user_name=user_name) is False: - - sub_error_check = True - - # Bad request, the user doesn't exist. - sub_errors["404_user_not_found"] = db.messages( - parameters={"username": user_name} - )["404_user_not_found"] - - # Was the user found? - if sub_error_check is False: - - assignee = User.objects.get(username=user_name) - - # Permissions are defined directly as they are - # in the POST request. - - # Assumes permissions are well-formed... - - # Source: https://docs.djangoproject.com/en/3.2/topics/auth/default/#permissions-and-authorization - assignee.user_permissions.set( - [ - Permission.objects.get(codename=i + "_" + prfx) - for i in creation_object["permissions"] - ] - ) - - # Permissions assigned. - sub_errors["200_OK_prefix_permissions_update"] = db.messages( - parameters={"prefix": standardized} - )["200_OK_prefix_permissions_update"] - - # Add the sub-"errors". - errors["username"] = sub_errors - - for g in assignees["group"]: - - # Create a list to hold information about sub-errors. - sub_errors = {} - - # Create a flag for if one of these sub-checks fails. - sub_error_check = False - - # Get the group whose permissions are being assigned. - if uu.check_group_exists(name=g) is False: - - sub_error_check = True - - # Bad request, the group doesn't exist. - sub_errors["404_group_not_found"] = db.messages( - parameters={"group": g} - )["404_group_not_found"] - - # Was the group found? - if sub_error_check is False: - - assignee = Group.objects.get(name=g) - - # Permissions are defined directly as they are - # in the POST request. - - # Assumes permissions are well-formed... - - # Source: https://docs.djangoproject.com/en/3.2/topics/auth/default/#permissions-and-authorization - assignee.permissions.set( - [ - Permission.objects.get(codename=i + "_" + prfx) - for i in creation_object["permissions"] - ] - ) - - # Permissions assigned. - sub_errors["200_OK_prefix_permissions_update"] = db.messages( - parameters={"prefix": standardized} - )["200_OK_prefix_permissions_update"] - - # Add the sub-"errors". - errors["group"] = sub_errors - - # Append the possible "errors". - return_data.append(errors) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - return Response(status=status.HTTP_200_OK, data=return_data) - - -def post_api_prefixes_token(request): - """Get Prefixes for a Token - - Get all available prefixes and their associated permissions for a given token. - The word 'Token' must be included in the header. - The token has already been validated, - so the user is guaranteed to exist. - A little expensive, but use the utility - we already have. Default will return flattened list of permissions. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - prefixes = UserUtils.UserUtils().prefix_perms_for_user( - user_object=UserUtils.UserUtils().user_from_request(request=request).username, - flatten=False, - ) - return Response(status=status.HTTP_200_OK, data=prefixes) - - -def post_api_prefixes_token_flat(request): - """Get Prefixes for a Token - - Get all available prefixes and their associated permissions for a given token. - The word 'Token' must be included in the header. The token has already been - validated, so the user is guaranteed to exist. - A little expensive, but use the utility we already have. Default will return - flattened list of permissions. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - prefixes = UserUtils.UserUtils().prefix_perms_for_user( - user_object=request.user, - flatten=True, - ) - - return Response(status=status.HTTP_200_OK, data=prefixes) - - -# --- Prefix --- # -@receiver(pre_save, sender=Prefix) -def create_permissions_for_prefix(sender, instance=None, **kwargs): - """Link prefix creation to permissions creation. - Check to see whether or not the permissions - have already been created for this prefix. - Create the macro-level, draft, and publish permissions. - Give FULL permissions to the prefix user owner - and their group. - - No try/except necessary here as the user's existence - has already been verified upstream. - - Source: https://stackoverflow.com/a/20361273 - """ - - # GroupInfo.objects.create( - # delete_members_on_group_deletion=False, - # description='Group administrators', - # group=Group.objects.get(name='group_admins'), - # max_n_members=-1, - # owner_user=User.objects.get(username='wheel') - # ) - - if 'test' in sys.argv or 'loaddata' in sys.argv: - return - - else: - owner_user = User.objects.get(username=instance.owner_user) - owner_group = Group.objects.get(name=instance.owner_group_id) - drafters = Group.objects.get(name=instance.prefix.lower() + "_drafter") - publishers = Group.objects.get(name=instance.prefix.lower() + "_publisher") - - try: - for perm in ["add", "change", "delete", "view", "draft", "publish"]: - Permission.objects.create( - name="Can " + perm + " BCOs with prefix " + instance.prefix, - content_type=ContentType.objects.get(app_label="api", model="bco"), - codename=perm + "_" + instance.prefix, - ) - new_perm = Permission.objects.get(codename=perm + "_" + instance.prefix) - owner_user.user_permissions.add(new_perm) - owner_group.permissions.add(new_perm) - publishers.permissions.add(new_perm) - if perm == "publish": - pass - else: - drafters.permissions.add(new_perm) - - except PermErrors.IntegrityError: - # The permissions already exist. - pass - - -@receiver(post_save, sender=Prefix) -def create_counter_for_prefix(sender, instance=None, created=False, **kwargs): - """Create prefix counter - - Creates a prefix counter for each prefix if it does not exist on save. - - Parameters - ---------- - sender: django.db.models.base.ModelBase - instance: api.model.prefix.Prefix - created: bool - """ - if 'test' in sys.argv or 'loaddata' in sys.argv or 'flush' in sys.argv: - return - else: - if created: - prefix_table.objects.create(n_objects=1, prefix=instance.prefix) - - -@receiver(post_delete, sender=Prefix) -def delete_permissions_for_prefix(sender, instance=None, **kwargs): - """Link prefix deletion to permissions deletion. - No risk of raising an error when using - a filter. - """ - - Permission.objects.filter(codename="add_" + instance.prefix).delete() - Permission.objects.filter(codename="change_" + instance.prefix).delete() - Permission.objects.filter(codename="delete_" + instance.prefix).delete() - Permission.objects.filter(codename="view_" + instance.prefix).delete() - Permission.objects.filter(codename="draft_" + instance.prefix).delete() - Permission.objects.filter(codename="publish_" + instance.prefix).delete() diff --git a/api/models.py b/api/models.py deleted file mode 100755 index 276b85a1..00000000 --- a/api/models.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -"""Models - -Explanation of optional fields: -https://stackoverflow.com/questions/16349545/optional-fields-in-django-models -TextField is used here because it has no character limit. - -Create a base model, then inherit for each table. -See the 4th example under "Model Inheritance" at -https://docs.djangoproject.com/en/3.1/topics/db/models/#model-inheritance - ---- Permissions imports --- # -Source: https://simpleisbetterthancomplex.com/tutorial/2016/07/22/how-to-extend-django-user-model.html -For setting the anonymous key. -The user model is straight from Django. -Referencing models. -Issue with timezones. -Source: https://stackoverflow.com/a/32411560 -Object-level permissions. -For token creation. -Source: https://www.django-rest-framework.org/api-guide/authentication/#generating-tokens -""" - -import sys -from django.db import models -from django.conf import settings -from django.contrib.auth.models import Group, User -from django.db.models.signals import post_save -from django.dispatch import receiver -from django.utils import timezone -from rest_framework.authtoken.models import Token - - -# Generic BCO model -class BCO(models.Model): - """BioComput Object Model. - - Attributes: - ----------- - contents: JSONField - BCO JSON contents - object_class: str - T.B.D. - object_id: str - BCO Object Identifier - owner_group: str - String representing the django.contrib.auth.models.Group that 'owns' the object - owner_user = models.ForeignKey(User, on_delete=models.CASCADE, to_field='username') - String representing the django.contrib.auth.models.User that 'owns' the object - prefix: str - Prefix for the BCO - schema: str - schema to which the BCO should be validated. Default is 'IEEE' - state:str - State of object. REFERENCED, PUBLISHED, DRAFT, and DELETE are currently accepted values. - last_update: DateTime - Date Time object for the last database change to this object - """ - - contents = models.JSONField() - object_class = models.TextField(blank=True, null=True) - object_id = models.TextField() - owner_group = models.ForeignKey(Group, on_delete=models.CASCADE, to_field="name") - owner_user = models.ForeignKey(User, on_delete=models.CASCADE, to_field="username") - prefix = models.CharField(max_length=5) - schema = models.TextField() - state = models.TextField() - last_update = models.DateTimeField() - - def __str__(self): - """String for representing the BCO model (in Admin site etc.).""" - return str(self.object_id) - - -# For registering new users. -class new_users(models.Model): - """Instead of using the User model, just use - a crude table to store the temporary information - when someone asks for a new account.""" - - email = models.EmailField() - temp_identifier = models.TextField(max_length=100) - # In case we are writing back to UserDB. - token = models.TextField(blank=True, null=True) - - # Which host to send the activation back to (i.e. UserDB). - hostname = models.TextField(blank=True, null=True) - # Issue with time zone, so implement the fix. - # Source: https://stackoverflow.com/a/32411560 - created = models.DateTimeField(default=timezone.now) - - def __email__(self): - return str(self.email) - - def __token__(self): - return str(self.token) - - def __hostname__(self): - return str(self.hostname) - - def __temp_identifier__(self): - return str(self.temp_identifier) - - -# def get_first_name(self): -# return self.first_name - -# User.add_to_class("__str__", get_first_name) - - -# --- Receivers --- # - - -# User and API Information are kept separate so that we can use it -# elsewhere easily. - -# Source: https://florimondmanca.github.io/djangorestframework-api-key/guide/#api-key-models -# Source: https://simpleisbetterthancomplex.com/tutorial/2016/07/22/how-to-extend-django-user-model.html - - -# --- User --- # - - -@receiver(post_save, sender=User) -def create_auth_token(sender, instance=None, created=False, **kwargs): - """Link user creation to token generation. - Source: https://www.django-rest-framework.org/api-guide/authentication/#generating-tokens - """ - if 'loaddata' in sys.argv: - return - else: - if created: - # The anonymous user's token is hard-coded - # in server.conf. - if instance.username == "anon": - # Create anon's record with the hard-coded key. - Token.objects.create(user=instance, key=settings.ANON_KEY) - else: - # Create a normal user's record. - Token.objects.create(user=instance) - - -# Link object deletion to object permissions deletion. - -# TODO:... diff --git a/api/permissions.py b/api/permissions.py deleted file mode 100644 index ff585bb9..00000000 --- a/api/permissions.py +++ /dev/null @@ -1,273 +0,0 @@ -# For getting objects out of the database. -# Apps -# Group object permissions -# Source: https://github.com/django-guardian/django-guardian#usage -# REST permissions. -# Source: https://stackoverflow.com/a/18646798 - -from django.apps import apps -from django.conf import settings -from api.scripts.utilities import DbUtils -from guardian.shortcuts import get_group_perms -from rest_framework import permissions -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User, Group - - -# ----- Admin Permissions ----- # - - -class RequestorInGroupAdminsGroup(permissions.BasePermission): - """Class docstring""" - - def has_permission(self, request, view): - """Check to see if the requester is in the group admins group. - Get the groups for this token (user). - This means getting the user ID for the token, - then the username.""" - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the prefix admins. - group_admins = Group.objects.filter(user=username, name="group_admins") - - return len(group_admins) > 0 - - -class RequestorInPrefixAdminsGroup(permissions.BasePermission): - """ - Check to see if the requester is in the prefix admins group. - - Get the groups for this token (user). - - Slight tweak in case the proper headers were not provided... - In particular, Swagger will cause an Internal Error 500 - if this logic is not here AND a view uses non-object-level - permissions (i.e. RequestorInPrefixAdminsGroup, see - ApiPrefixesPermissionsSet in views.py) - """ - - def has_permission(self, request, view): - """ - This means getting the user ID for the token, - then the username. - Get the prefix admins. - """ - - if settings.PREFIX is True: - return True - if "HTTP_AUTHORIZATION" in request.META: - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - - username = User.objects.get(id=user_id) - prefix_admins = Group.objects.filter(user=username, name="prefix_admins") - return len(prefix_admins) > 0 - - else: - return False - - -# ----- Table Permissions ----- # - - -# ----- Object Permissions ----- # - - -# Permissions based on REST. -# Source: https://stackoverflow.com/a/18646798 -class RequestorIsObjectOwner(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - # Check to see if the requester is in a particular owner group. - - # Get the groups for this token (user). - - # This means getting the user ID for the token, - # then the username. - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the groups for this username (at a minimum the user - # group created when the account was created should show up). - - # Now get the user's groups. - groups = Group.objects.filter(user=username) - - # Check that the user is in the ownership group. - - # Note that view permissions are NOT checked because - # the owner automatically has full permissions on the - # object. - owner_group = ( - apps.get_model(app_label="api", model_name=request.data["table_name"]) - .objects.get(object_id=request.data["object_id"]) - .owner_group - ) - - # Note: could use https://docs.djangoproject.com/en/3.2/topics/auth/customizing/#custom-permissions - # to set these, but group membership confers all read - # permissions. - - # Is this user in the ownership group? - return groups.filter(name=owner_group).exists() - - -class RequestorInObjectOwnerGroup(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - # Check to see if the requester is in a particular owner group. - - # Get the groups for this token (user). - - # This means getting the user ID for the token, - # then the username. - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the groups for this username (at a minimum the user - # group created when the account was created should show up). - - # Now get the user's groups. - groups = Group.objects.filter(user=username) - - # Check that the user is in the ownership group. - - # Note that view permissions are NOT checked because - # the owner automatically has full permissions on the - # object. - owner_group = ( - apps.get_model(app_label="api", model_name=request.data["table_name"]) - .objects.get(object_id=request.data["object_id"]) - .owner_group - ) - - # Note: could use https://docs.djangoproject.com/en/3.2/topics/auth/customizing/#custom-permissions - # to set these, but group membership confers all read - # permissions. - - # Is this user in the ownership group? - return groups.filter(name=owner_group).exists() - - -# Generic object-level permissions checker. -class HasObjectGenericPermission(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - # Check to see if the requester (group) has the given permission on the given object. - - # Don't need to check for table here as that is done in the step before... - - # *Must* return a True or False. - # Source: https://www.django-rest-framework.org/api-guide/permissions/#custom-permissions - - # This means getting the user ID for the token, - # then the username. - # Source: https://stackoverflow.com/questions/31813572/access-token-from-view - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # See if the group can do something with this object. - # Source: https://django-guardian.readthedocs.io/en/stable/userguide/check.html#get-perms - - # Get the group object first, then check. - if request.data["perm_type"] + "_" + request.data[ - "table_name" - ] in get_group_perms(username, obj): - - return True - - else: - - # User doesn't have the right permissions for this object. - return False - - -# Specific permissions (necessary to use logical operators -# when checking permissions). - -# These are all just specific cases of HasObjectGenericPermission -class HasObjectAddPermission(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the group object first, then check. - if "add_" + request.data["table_name"] in get_group_perms(username, obj): - - return True - - else: - - # User doesn't have the right permissions for this object. - return False - - -class HasObjectChangePermission(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the group object first, then check. - if "change_" + request.data["table_name"] in get_group_perms(username, obj): - - return True - - else: - - # User doesn't have the right permissions for this object. - return False - - -class HasObjectDeletePermission(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the group object first, then check. - if "delete_" + request.data["table_name"] in get_group_perms(username, obj): - - return True - - else: - - # User doesn't have the right permissions for this object. - return False - - -class HasObjectViewPermission(permissions.BasePermission): - def has_object_permission(self, request, view, obj): - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - username = User.objects.get(id=user_id) - - # Get the group object first, then check. - if "view_" + request.data["table_name"] in get_group_perms(username, obj): - - return True - - else: - - # User doesn't have the right permissions for this object. - return False diff --git a/api/rdb.sh b/api/rdb.sh deleted file mode 100755 index a510982a..00000000 --- a/api/rdb.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/bash - -clear - -find . -path "./migrations/*.py" -not -name "__init__.py" -delete -find . -path "./migrations/*.pyc" -delete - -cd .. - -rm db.sqlite3 - -python3.9 manage.py makemigrations -python3.9 manage.py migrate -python3.9 manage.py loaddata ./api/fixtures/metafixtures.json - -# Clear out all the junk. -#clear - -# Print the keys? -if [[ $2 == '-k' ]] -then - - echo " " - echo " " - echo "Anon key for the installation is..." - sqlite3 db.sqlite3 'SELECT B.key FROM auth_user AS A JOIN authtoken_token AS B ON A.id = B.user_id WHERE A.username = "anon";' - echo " " - echo " " - echo "Wheel key for the installation is..." - sqlite3 db.sqlite3 'SELECT B.key FROM auth_user AS A JOIN authtoken_token AS B ON A.id = B.user_id WHERE A.username = "wheel";' - echo " " - echo " " - -fi - -if [[ $1 == '-r' ]] -then - - python3.9 manage.py runserver 8000 - -fi diff --git a/api/request_definitions/GET.schema b/api/request_definitions/GET.schema deleted file mode 100755 index 5a407a34..00000000 --- a/api/request_definitions/GET.schema +++ /dev/null @@ -1,20 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/GET.schema", - "title": "GET", - "description": "A GET request.", - "type": "object", - "properties": - { - "GET_activate_account": - { - "description": "Activate a previously requested account.", - "$ref": "request_definitions/templates/GET_activate_account.schema" - }, - "GET_retrieve_available_schema": - { - "description": "The template for seeing what schema are available on the server to validate against.", - "$ref": "request_definitions/templates/GET_retrieve_available_schema.schema" - } - } -} diff --git a/api/request_definitions/POST.schema b/api/request_definitions/POST.schema deleted file mode 100755 index cf62d9ad..00000000 --- a/api/request_definitions/POST.schema +++ /dev/null @@ -1,40 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/POST.schema", - "title": "POST", - "description": "A POST request.", - "type": "object", - "properties": - { - "POST_objects_draft": - { - "description": "The template for drafting a new object.", - "$ref": "request_definitions/templates/POST_objects_draft.schema" - }, - "POST_objects_publish": - { - "description": "The template for publishing an object.", - "$ref": "request_definitions/templates/POST_objects_draft.schema" - }, - "POST_api_accounts_new": - { - "description": "Ask for a new account on the API.", - "$ref": "request_definitions/templates/POST_api_accounts_new.schema" - }, - "POST_object_listing_by_token": - { - "description": "Get all available objects for a given token.", - "$ref": "request_definitions/templates/POST_object_listing_by_token.schema" - }, - "POST_read_object": - { - "description": "The template for reading a(n) object(s).", - "$ref": "request_definitions/templates/POST_read_object.schema" - }, - "POST_validate_payload_against_schema": - { - "description": "The template for validating an object against a schema.", - "$ref": "request_definitions/templates/POST_validate_payload_against_schema.schema" - } - } -} diff --git a/api/request_definitions/templates/DELETE_delete_object_by_id.schema b/api/request_definitions/templates/DELETE_delete_object_by_id.schema deleted file mode 100755 index 52ecf43c..00000000 --- a/api/request_definitions/templates/DELETE_delete_object_by_id.schema +++ /dev/null @@ -1,55 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/product.schema.json", - "title": "POST_create_new_object", - "description": "A POST request for creating a new object.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "template": - { - "description": "The name of the request template.", - "type": "string", - "pattern": "CREATE_NEW_OBJECT" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - }, - "object_id": - { - "description": "Either 'NEW' or an existing JSON ID in the repository.", - "type": "string" - }, - "schema": - { - "description": "The schema under which the POSTed JSON falls.", - "type": "string" - }, - "payload": - { - "description": "The JSON contents to be stored", - "type": "object" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - } - }, - "required": - [ - "template", - "table", - "object_id", - "schema", - "payload", - "state" - ] - } -} \ No newline at end of file diff --git a/api/request_definitions/templates/GET_activate_account.schema b/api/request_definitions/templates/GET_activate_account.schema deleted file mode 100755 index 0ca090d2..00000000 --- a/api/request_definitions/templates/GET_activate_account.schema +++ /dev/null @@ -1,25 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/GET_activate_account.schema", - "title": "GET_activate_account", - "description": "A GET request for activating a previously requested account.", - "type": "object", - "properties": - { - "email": - { - "description": "The email to activate.", - "type": "string" - }, - "temp_identifier": - { - "description": "A temporary token used to activate an account.", - "type": "string" - } - }, - "required": - [ - "email", - "temp_identifier" - ] -} diff --git a/api/request_definitions/templates/GET_get_object_by_id.schema b/api/request_definitions/templates/GET_get_object_by_id.schema deleted file mode 100755 index 52ecf43c..00000000 --- a/api/request_definitions/templates/GET_get_object_by_id.schema +++ /dev/null @@ -1,55 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/product.schema.json", - "title": "POST_create_new_object", - "description": "A POST request for creating a new object.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "template": - { - "description": "The name of the request template.", - "type": "string", - "pattern": "CREATE_NEW_OBJECT" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - }, - "object_id": - { - "description": "Either 'NEW' or an existing JSON ID in the repository.", - "type": "string" - }, - "schema": - { - "description": "The schema under which the POSTed JSON falls.", - "type": "string" - }, - "payload": - { - "description": "The JSON contents to be stored", - "type": "object" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - } - }, - "required": - [ - "template", - "table", - "object_id", - "schema", - "payload", - "state" - ] - } -} \ No newline at end of file diff --git a/api/request_definitions/templates/GET_retrieve_available_schema.schema b/api/request_definitions/templates/GET_retrieve_available_schema.schema deleted file mode 100755 index 36af0e58..00000000 --- a/api/request_definitions/templates/GET_retrieve_available_schema.schema +++ /dev/null @@ -1,13 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/GET_retrieve_available_schema.schema", - "title": "GET_retrieve_available_schema", - "description": "A GET request to ask the API what schema are available to validate against.", - "type": "object", - "properties": - { - }, - "required": - [ - ] -} \ No newline at end of file diff --git a/api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema b/api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema deleted file mode 100755 index 52ecf43c..00000000 --- a/api/request_definitions/templates/POST_convert_existing_object_between_schemas.schema +++ /dev/null @@ -1,55 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/product.schema.json", - "title": "POST_create_new_object", - "description": "A POST request for creating a new object.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "template": - { - "description": "The name of the request template.", - "type": "string", - "pattern": "CREATE_NEW_OBJECT" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - }, - "object_id": - { - "description": "Either 'NEW' or an existing JSON ID in the repository.", - "type": "string" - }, - "schema": - { - "description": "The schema under which the POSTed JSON falls.", - "type": "string" - }, - "payload": - { - "description": "The JSON contents to be stored", - "type": "object" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - } - }, - "required": - [ - "template", - "table", - "object_id", - "schema", - "payload", - "state" - ] - } -} \ No newline at end of file diff --git a/api/request_definitions/templates/POST_convert_payload_to_schema.schema b/api/request_definitions/templates/POST_convert_payload_to_schema.schema deleted file mode 100755 index 3b648aca..00000000 --- a/api/request_definitions/templates/POST_convert_payload_to_schema.schema +++ /dev/null @@ -1,44 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/product.schema.json", - "title": "POST_BCO", - "description": "A POST request to the BCO API.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "object_id": - { - "description": "Either 'NEW' or an existing BCO ID in the repository.", - "type": "string", - "pattern": "^https://nih.gov/NIH_(\\d+)_v_(\\d+)$|^ftp://nih.gov/NIH_(\\d+)_v_(\\d+)$|^doi://nih.gov/NIH_(\\d+)_v_(\\d+)$|^http://nih.gov/NIH_(\\d+)_v_(\\d+)$|^NEW$" - }, - "schema": - { - "description": "The schema under which the POSTed BCO falls.", - "type": "string", - "pattern": "^IEEE 2791-2020$" - }, - "bco": - { - "description": "The BCO.", - "$ref": "https://opensource.ieee.org/2791-object/ieee-2791-schema/raw/master/2791object.json" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - } - }, - "required": - [ - "object_id", - "schema", - "bco", - "state" - ] - } -} \ No newline at end of file diff --git a/api/request_definitions/templates/POST_new_account.schema b/api/request_definitions/templates/POST_new_account.schema deleted file mode 100755 index b82f0811..00000000 --- a/api/request_definitions/templates/POST_new_account.schema +++ /dev/null @@ -1,20 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_api_accounts_new.schema", - "title": "POST_api_accounts_new", - "description": "A POST request for creating a new account.", - "type": "object", - "properties": { - "email": { - "description": "The email to create a new account for.", - "type": "string" - }, - "hostname": { - "description": "Which host to activate the account on (send server information back to).", - "type": "string" - } - }, - "required": [ - "email" - ] -} diff --git a/api/request_definitions/templates/POST_object_listing_by_token.schema b/api/request_definitions/templates/POST_object_listing_by_token.schema deleted file mode 100755 index a3f9a951..00000000 --- a/api/request_definitions/templates/POST_object_listing_by_token.schema +++ /dev/null @@ -1,19 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_object_listing_by_token.schema", - "title": "POST_object_listing_by_token", - "description": "A POST request for getting all objects associated with a token.", - "type": "object", - "properties": - { - "token": - { - "description": "The token to get objects for.", - "type": "string" - } - }, - "required": - [ - "token" - ] -} diff --git a/api/request_definitions/templates/POST_objects_draft.schema b/api/request_definitions/templates/POST_objects_draft.schema deleted file mode 100755 index 2639faf2..00000000 --- a/api/request_definitions/templates/POST_objects_draft.schema +++ /dev/null @@ -1,65 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_create_new_object.schema", - "title": "POST_create_new_object", - "description": "A POST request for creating a new object.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - - "contents": - { - "description": "The JSON contents to be stored.", - "type": "object" - }, - "from_draft": - { - "description": "The draft ID to use to publish the object.", - "type": "string" - }, - "object_id": - { - "description": "The object ID to create a new version for.", - "type": "string" - }, - "owner_group": - { - "description": "Which group should own the object", - "type": "string" - }, - "retain_draft": - { - "description": "Retain the draft after publishing?", - "type": "string" - }, - "schema": - { - "description": "The schema under which the POSTed JSON falls.", - "type": "string" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - } - }, - "required": - [ - "from_draft", - "owner_group", - "retain_draft", - "schema", - "state", - "table" - ] - } -} diff --git a/api/request_definitions/templates/POST_objects_publish.schema b/api/request_definitions/templates/POST_objects_publish.schema deleted file mode 100755 index 2639faf2..00000000 --- a/api/request_definitions/templates/POST_objects_publish.schema +++ /dev/null @@ -1,65 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_create_new_object.schema", - "title": "POST_create_new_object", - "description": "A POST request for creating a new object.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - - "contents": - { - "description": "The JSON contents to be stored.", - "type": "object" - }, - "from_draft": - { - "description": "The draft ID to use to publish the object.", - "type": "string" - }, - "object_id": - { - "description": "The object ID to create a new version for.", - "type": "string" - }, - "owner_group": - { - "description": "Which group should own the object", - "type": "string" - }, - "retain_draft": - { - "description": "Retain the draft after publishing?", - "type": "string" - }, - "schema": - { - "description": "The schema under which the POSTed JSON falls.", - "type": "string" - }, - "state": - { - "description": "Either 'DRAFT' or 'PUBLISHED'.", - "type": "string", - "pattern": "^DRAFT$|^PUBLISHED$" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - } - }, - "required": - [ - "from_draft", - "owner_group", - "retain_draft", - "schema", - "state", - "table" - ] - } -} diff --git a/api/request_definitions/templates/POST_read_object.schema b/api/request_definitions/templates/POST_read_object.schema deleted file mode 100755 index 094df6ab..00000000 --- a/api/request_definitions/templates/POST_read_object.schema +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_read_object.schema", - "title": "POST_read_object", - "description": "A POST request for reading a(n) object(s).", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "object_id": - { - "description": "The object ID to create a new version for.", - "type": "string" - }, - "table": - { - "description": "The table to write the new object to.", - "type": "string" - }, - "fields": - { - "description": "Which fields to read in the object", - "type": "string" - } - }, - "required": - [ - "table" - ] - } -} diff --git a/api/request_definitions/templates/POST_validate_payload_against_schema.schema b/api/request_definitions/templates/POST_validate_payload_against_schema.schema deleted file mode 100755 index a36f7e43..00000000 --- a/api/request_definitions/templates/POST_validate_payload_against_schema.schema +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "request_definitions/templates/POST_validate_payload_against_schema.schema", - "title": "POST_validate_payload_against_schema", - "description": "A POST request to validate an object against a schema.", - "type": "array", - "items": - { - "type": "object", - "properties": - { - "payload": - { - "description": "The table to write the new object to.", - "type": "object" - }, - "schema_server": - { - "description": "The name of the schema on the server to use.", - "type": "string" - }, - "schema_own": - { - "description": "A schema provided with the request.", - "type": "object" - } - }, - "required": - [ - "payload" - ] - } -} \ No newline at end of file diff --git a/api/scripts/method_specific/GET_activate_account.py b/api/scripts/method_specific/GET_activate_account.py deleted file mode 100755 index d442ecc0..00000000 --- a/api/scripts/method_specific/GET_activate_account.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -"""Activate Account - -""" - -from api.scripts.utilities import DbUtils - -# For url -from django.conf import settings - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# Source: https://codeloop.org/django-rest-framework-course-for-beginners/ - - -def GET_activate_account(username, temp_identifier): - """Activate Account - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - db_utils = DbUtils.DbUtils() - - # The account has not been activated, but does it exist - # in the temporary table? - if db_utils.check_activation_credentials( - p_app_label="api", - p_model_name="new_users", - p_email=username, - p_temp_identifier=temp_identifier, - ): - - # The credentials match, so activate the account. - credential_try = db_utils.activate_account(p_email=username) - - if len(credential_try) > 0: - return Response(status=status.HTTP_200_OK,) - - # The credentials weren't good. - return Response(status=status.HTTP_403_FORBIDDEN) - - return Response(status=status.HTTP_403_FORBIDDEN) diff --git a/api/scripts/method_specific/GET_draft_object_by_id.py b/api/scripts/method_specific/GET_draft_object_by_id.py deleted file mode 100755 index 11599062..00000000 --- a/api/scripts/method_specific/GET_draft_object_by_id.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -"""Get a draft by ID - -See if the object exists, and if so, -see if the requestor has permissions -for it. -""" - -from api.models import BCO -from api.scripts.utilities import UserUtils -from rest_framework import status, authtoken -from rest_framework.response import Response -from guardian.shortcuts import get_objects_for_user -from authentication.selectors import get_user_from_auth_token - -def get_draft_object_by_id(do_id, request): - """Get a draft object - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. If the user has permission to view the object - it is returned. If not the response is HTTP_403_FORBIDDEN. - """ - - filtered = BCO.objects.filter(object_id__regex=rf"(.*?)/{do_id}", state="DRAFT") - - if filtered.exists(): - if len(filtered) > 1: - # There are multiple matches; this shouldn't be possible - return Response( - data="The contents of the draft could not be sent back because" - "there are multiple draft matches. Please contact and admin.", - status=status.HTTP_400_BAD_REQUEST, - ) - # Get the requestor's info. - try: - user = UserUtils.UserUtils().user_from_request(request=request) - except authtoken.models.Token.DoesNotExist: - user = get_user_from_auth_token(request.META.get("HTTP_AUTHORIZATION").split(" ")[1]) - user_perms = UserUtils.UserUtils().prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["view"] - ) - user_objects = get_objects_for_user( - user=user, perms=[], klass=BCO, any_perm=True - ) - - # Does the requestor have permissions for the object? - full_object_id = filtered.values_list("object_id", flat=True)[0] - objected = BCO.objects.get(object_id=full_object_id) - prefix = objected.prefix - object_permission = objected in user_objects - group_permission = ("view_" + prefix) in user_perms - - if object_permission is True or group_permission is True: - return Response(data=objected.contents, status=status.HTTP_200_OK) - - return Response( - data="The contents of the draft could not be sent back because" - " the requestor does not have appropriate permissions.", - status=status.HTTP_401_UNAUTHORIZED, - ) - # the root ID does not exist at all. - return Response( - data="That draft could not be found on the server.", - status=status.HTTP_404_NOT_FOUND, - ) diff --git a/api/scripts/method_specific/GET_published_object_by_id.py b/api/scripts/method_specific/GET_published_object_by_id.py deleted file mode 100755 index 93cb2250..00000000 --- a/api/scripts/method_specific/GET_published_object_by_id.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python3 - -import re -import json -from api.models import BCO -from django.forms.models import model_to_dict -from rest_framework import status -from rest_framework.response import Response -from semver import VersionInfo as Version -from typing import Optional, Tuple - -# TODO: This should be put into a universal place to grab from - also -# duplicated in POST_api_objects_drafts_token.py - -BASEVERSION = re.compile( - r"""[vV]? - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - )? - )? - """, - re.VERBOSE, -) - - -def coerce(version: str) -> Tuple[Version, Optional[str]]: - """ - Convert an incomplete version string into a semver-compatible Version - object - - * Tries to detect a "basic" version string (``major.minor.patch``). - * If not enough components can be found, missing components are - set to zero to obtain a valid semver version. - - :param str version: the version string to convert - :return: a tuple with a :class:`Version` instance (or ``None`` - if it's not a version) and the rest of the string which doesn't - belong to a basic version. - :rtype: tuple(:class:`Version` | None, str) - """ - - match = BASEVERSION.search(version) - if not match: - return (None, version) - - ver = { - key: 0 if value is None else value for key, value in match.groupdict().items() - } - ver = Version(**ver) - rest = match.string[match.end() :] # noqa:E203 - return ver, rest - - -def GET_published_object_by_id(oi_root): - """ - Get a published object given a root. - - See if the root ID even exists. - - We have to query twice because we don't - have a field in the BCO model to hold - the object version... - - Note the trailing slash in the regex search to prevent - sub-string matches (e.g. http://127.0.0.1:8000/BCO_5 and - http://127.0.0.1:8000/BCO_53 would both match the regex - http://127.0.0.1:8000/BCO_5 if we did not have the trailing - slash). - - """ - - # Note: This is not needed - removing out the underscore breaks the regex below, leaving in for the moment - # since I'm not sure why it was ever added (maybe there is a reason?) - # oi_root = oi_root.split("_")[0] + '{:06d}'.format(int(oi_root.split("_")[1])) - all_versions = list( - BCO.objects.filter( - object_id__regex=rf"(.*?)/{oi_root}/", state="PUBLISHED" - ).values_list("object_id", flat=True) - ) - # Get the latest version for this object if we have any. - if len(all_versions) > 0: - - # There was at least one version of the root ID, - # so now perform some logic based on whether or - # not a version was also passed. - - # First find the latest version of the object. - latest_versions = [i.split("/")[-1:][0] for i in all_versions] - l_version, _ = coerce(max(latest_versions, key=coerce)) - latest_version = latest_versions[-1] - # Kick back the latest version. - return Response( - data=model_to_dict(BCO.objects.get( - # object_id__regex=rf"{oi_root}/{l_version.major}.{l_version.minor}?.?{l_version.patch}", - object_id__regex=f'{oi_root}/{latest_version}', - state="PUBLISHED", - )), - status=status.HTTP_200_OK, - ) - - else: - # If all_versions has 0 length, then the - # the root ID does not exist at all. - print("No objects were found for the root ID provided.") - return Response( - data="No objects were found for the root ID provided.", - status=status.HTTP_404_NOT_FOUND, - ) diff --git a/api/scripts/method_specific/GET_published_object_by_id_with_version.py b/api/scripts/method_specific/GET_published_object_by_id_with_version.py deleted file mode 100755 index 801320b7..00000000 --- a/api/scripts/method_specific/GET_published_object_by_id_with_version.py +++ /dev/null @@ -1,114 +0,0 @@ -# The BCO model -from ...models import BCO - -# Responses -from rest_framework import status -from rest_framework.response import Response - - -def GET_published_object_by_id_with_version(oi_root, oi_version): - """ - Fetch a published BCO by the PREFIX, BCO NAME, and VERSION ID - """ - - #### - # We are assuming the oi_root looks something like this - # BCO_28 - # Where - # `BCO` is the prefix - # and `28` is the object name - #### - - # Split by '_' - underscores = oi_root.count("_") - if underscores < 1: - # ERROR - there should be an underscore separating the prefix and the BCO name - return Response( - data="This API requires that the prefix and the BCO name be separated by an underscore '_' in the object_id_root PATH variable.", - status=status.HTTP_400_BAD_REQUEST, - ) - - # TODO: This allows BCO Names to support underscores - not sure if that is valid though - # This can be 'fixed' by adding in a check for > 1 above - # Might be a better idea to split prefix, BCO name, and version into a three part get - bco_prefix, bco_name = oi_root.split("_", maxsplit=1) - - # retrieved = list( - # BCO.objects.filter( - # # contents__search=bco_name, - # prefix=bco_prefix, - # contents__provenance_domain__name=bco_name, - # contents__provenance_domain__version=oi_version, - # state='PUBLISHED' - # ).values_list( - # 'contents', - # flat=True - # ) - # ) - # The object ID either exists or it does not. - retrieved = list( - BCO.objects.filter( - object_id__regex=rf"(.*?)/{oi_root}/{oi_version}", state="PUBLISHED" - ).values_list("contents", flat=True) - ) - # Was the object found? - if len(retrieved) > 0: - # Kick it back. - return Response(data=retrieved, status=status.HTTP_200_OK) - else: - # If all_versions has 0 length, then the - # the root ID does not exist at all. - print("No objects were found for the root ID and version provided.") - return Response( - data="No objects were found for the root ID and version provided.", - status=status.HTTP_400_BAD_REQUEST, - ) - - # TODO: This code from here on down appears to be unreachable? The above if/else will always return the request - # Maybe this is placeholder code for something? - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - - # First, get the table based on the requested published object. - table_name = (oi_root.split("_")[0] + "_publish").lower() - - # Does the table exist? - # TODO: replace with better table call... - available_tables = settings.MODELS["json_object"] - - if table_name in available_tables: - - # Construct the object ID. - constructed = object_id = ( - settings.PUBLIC_HOSTNAME + "/" + oi_root + "/" + oi_version - ) - - # Does the object exist in the table? - if ( - apps.get_model(app_label="api", model_name=table_name) - .objects.filter(object_id=constructed) - .exists() - ): - - # Get the object, then check the permissions. - objected = apps.get_model( - app_label="api", model_name=table_name - ).objects.get(object_id=constructed) - - return Response( - data=serializers.serialize( - "json", - [ - objected, - ], - ), - status=status.HTTP_200_OK, - ) - - else: - - return Response(status=status.HTTP_400_BAD_REQUEST) - - else: - - return Response(status=status.HTTP_400_BAD_REQUEST) diff --git a/api/scripts/method_specific/GET_retrieve_available_schema.py b/api/scripts/method_specific/GET_retrieve_available_schema.py deleted file mode 100755 index bcd643cc..00000000 --- a/api/scripts/method_specific/GET_retrieve_available_schema.py +++ /dev/null @@ -1,49 +0,0 @@ -# For the folder search. -from ..utilities import FileUtils - -from ..utilities import JsonUtils -import json - -# Put try catch in later to indicate failure to load schema... - - -def GET_retrieve_available_schema(bulk_request): - - # We don't use settings.VALIDATION_TEMPLATES because - # that contains paths on the server which we don't - # want to reveal. - - # Get the schema from the validation_definitions folder. - folder_schema = FileUtils.FileUtils().get_folder_tree( - search_folder="validation_definitions/" - )["paths"] - - # Define a list to hold the processed paths. - processed_paths = [] - - # Strip out everything that is above the server folder level. - for path in folder_schema: - - # Split the path up to help construct the root folder. - file_name_split = path.split("/") - - # Where is the 'validation_definitions/' item? - vd_index = file_name_split.index("validation_definitions") - - # Collapse everything after this index. - collapsed = "/".join(file_name_split[vd_index + 1 :]) - - # Set the name. - processed_paths.append(collapsed) - - # Create a usable structure. - - # Source: https://stackoverflow.com/questions/9618862/how-to-parse-a-directory-structure-into-dictionary - dct = {} - - for item in processed_paths: - p = dct - for x in item.split("/"): - p = p.setdefault(x, {}) - - return {"request_status": "success", "contents": dct} diff --git a/api/scripts/method_specific/POST_api_accounts_describe.py b/api/scripts/method_specific/POST_api_accounts_describe.py deleted file mode 100755 index aca72878..00000000 --- a/api/scripts/method_specific/POST_api_accounts_describe.py +++ /dev/null @@ -1,27 +0,0 @@ -# For getting the user's information. -from ..utilities import UserUtils -from django.contrib.auth.models import User -from rest_framework.authtoken.models import Token - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# Source: https://codeloop.org/django-rest-framework-course-for-beginners/ - - -def POST_api_accounts_describe(token): - - # The token has already been validated, - # so the user is guaranteed to exist. - - # A little bit of processing required here... - processed = token.split(" ")[1] - - # Instantiate UserUtils - uu = UserUtils.UserUtils() - # Get the user's information - return Response( - data=uu.get_user_info(username=Token.objects.get(key=processed).user.username), - status=status.HTTP_200_OK, - ) diff --git a/api/scripts/method_specific/POST_api_accounts_new.py b/api/scripts/method_specific/POST_api_accounts_new.py deleted file mode 100755 index 2c5dda84..00000000 --- a/api/scripts/method_specific/POST_api_accounts_new.py +++ /dev/null @@ -1,155 +0,0 @@ -# For interacting with the database -from ..utilities import DbUtils - -# For the user lookup -from django.contrib.auth.models import User - -# For sending e-mails. -# Source: https://www.urlencoder.io/python/ -# Source: https://realpython.com/python-send-email/#sending-fancy-emails -# Source: https://docs.djangoproject.com/en/3.2/topics/email/#send-mail -from django.core.mail import send_mail -from django.conf import settings -import urllib.parse - -# Development account activation -from .GET_activate_account import GET_activate_account - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# For getting user tokens -from rest_framework.authtoken.models import Token - -# For generating a random temp identifier - -# Source: https://stackoverflow.com/questions/976577/random-hash-in-python -import uuid - - -# Source: https://codeloop.org/django-rest-framework-course-for-beginners/ - - -def POST_api_accounts_new(request): - # An e-mail is provided, and if the e-mail already exists - # as an account, then return 409. - try: - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - - # Does the account associated with this e-mail already - # exist in either a temporary or a permanent user profile? - if ( - db.check_user_exists( - p_app_label="api", p_model_name="new_users", p_email=request.data["email"] - ) - is None - ): - if User.objects.filter(email=request.data["email"]).exists(): - # Account has already been activated. - return Response( - status=status.HTTP_409_CONFLICT, - data={"message": "Account has already been activated."}, - ) - - # The email has not already been asked for and - # it has not been activated. - - # Generate a temp ID to use so that the account can - # be activated. - - # The data is based on whether or not a token was provided. - - # Create a temporary identifier. - temp_identifier = uuid.uuid4().hex - if "token" in request.data and "hostname" in request.data: - p_data = { - "email": request.data["email"], - "temp_identifier": temp_identifier, - "hostname": request.data["hostname"], - "token": request.data["token"], - } - - else: - p_data = { - "email": request.data["email"], - "temp_identifier": temp_identifier, - } - - objects_written = db.write_object( - p_app_label="api", - p_model_name="new_users", - p_fields=["email", "temp_identifier", "hostname", "token"], - p_data=p_data, - ) - - if objects_written < 1: - # There is a problem with the write. - return Response( - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - data="Not able to save the new account.", - ) - - # Send an e-mail to let the requestor know that they - # need to follow the activation link within 10 minutes. - - # Source: https://realpython.com/python-send-email/#sending-fancy-emails - - activation_link = "" - template = "" - - activation_link = ( - settings.PUBLIC_HOSTNAME - + "/api/accounts/activate/" - + urllib.parse.quote(request.data["email"]) - + "/" - + temp_identifier - ) - - template = '

Please click this link within the next 10 minutes to activate your BioCompute Portal account: {}.

'.format( - activation_link, activation_link - ) - - try: - send_mail( - subject="Registration for BioCompute Portal", - message="Testing.", - html_message=template, - from_email="mail_sender@portal.aws.biochemistry.gwu.edu", - recipient_list=[request.data["email"]], - fail_silently=False, - ) - print("Email signal sent") - - except Exception as error: - print("activation_link", activation_link) - print('ERROR: ', error) - return Response( - status=status.HTTP_201_CREATED, data={ - "message": f"Not able to send authentication email: {error}", - "activation_link": f"{activation_link}" - } - ) - - if request.data["token"] == "SampleToken": - print("testing with SampleToken") - return Response( - status=status.HTTP_201_CREATED, data={ - "message": "Testing token received", - "activation_link": f"{activation_link}" - } - ) - - return Response(status=status.HTTP_201_CREATED) - - else: - return Response( - status=status.HTTP_409_CONFLICT, - data={"message": "Account has already been requested."}, - ) - except: - return Response( - status=status.HTTP_400_BAD_REQUEST, - data={"message": "Bad request format."}, - ) \ No newline at end of file diff --git a/api/scripts/method_specific/POST_api_objects_drafts_create.py b/api/scripts/method_specific/POST_api_objects_drafts_create.py deleted file mode 100755 index b9a74d27..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_create.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 - -"""Create BCO Draft - --------------------- -Creates a new BCO draft object. -""" -from api.models import BCO -from api.scripts.utilities import DbUtils, UserUtils -from api.model.prefix import prefix_table -from django.conf import settings -from django.contrib.auth.models import Group -from django.utils import timezone -from rest_framework import status, authtoken -from rest_framework.response import Response -from authentication.selectors import get_user_from_auth_token - -def post_api_objects_drafts_create(request): - """Create BCO Draft - - Parameters - ---------- - request: rest_framework.request. - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into - arbitrary media types. - """ - - db_utils = DbUtils.DbUtils() - try: - user = UserUtils.UserUtils().user_from_request(request=request) - except authtoken.models.Token.DoesNotExist: - user = get_user_from_auth_token(request.META.get("HTTP_AUTHORIZATION").split(" ")[1]) - prefix_perms = UserUtils.UserUtils().prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["add"] - ) - - # Define the bulk request. - bulk_request = request.data["POST_api_objects_draft_create"] - - # Get the object naming information. - object_naming_info = settings.OBJECT_NAMING - root_uri = settings.OBJECT_NAMING["root_uri"] - # Construct an array to return the objects. - returning = [] - any_failed = False - - # Since bulk_request is an array, go over each - # item in the array. - - for creation_object in bulk_request: - prefix = creation_object["prefix"].upper() - # Require the macro-level and draft-specific permissions. - if "add_" + prefix in prefix_perms and "draft_" + prefix in prefix_perms: - prefix_counter = prefix_table.objects.get(prefix=prefix) - if "object_id" in creation_object: - if BCO.objects.filter(object_id=creation_object["object_id"]).exists(): - returning.append( - db_utils.messages( - parameters={"object_id": creation_object["object_id"]} - )["409_object_conflict"] - ) - any_failed = True - continue - constructed_obj_id = creation_object["object_id"] - else: - object_num = format(prefix_counter.n_objects, "06d") - constructed_obj_id = ( - root_uri + "/" + prefix + "_" + object_num + "/DRAFT" - ) - creation_object["object_id"] = constructed_obj_id - - if Group.objects.filter( - name=creation_object["owner_group"].lower() - ).exists(): - - # TODO: abstract this out to DbUtils. - # constructed_name = object_naming_info["uri_regex"].replace( - # "root_uri", object_naming_info["root_uri"] - # ) - # constructed_name = constructed_name.replace("prefix", prefix) - - # prefix_location = constructed_name.index(prefix) - # prefix_length = len(prefix) - # constructed_name = constructed_name[0 : prefix_location + prefix_length] - # - # creation_object["object_id"] = ( - # constructed_name - # + "_" - # + "{:06d}".format(prefix_counter.n_objects) - # + "/DRAFT" - # ) - # Make sure to create the object ID field in our draft. - creation_object["contents"]["object_id"] = constructed_obj_id - # Instantiate the owner group as we'll need it a few times here. - owner_group = Group.objects.get(name=creation_object["owner_group"]) - - # Django wants a primary key for the Group... - creation_object["owner_group"] = owner_group.name - - # Set the owner user (the requestor). - creation_object["owner_user"] = user.username - - # Give the creation object the prefix. - creation_object["prefix"] = prefix - - # This is a DRAFT. - creation_object["state"] = "DRAFT" - - # Set the datetime properly. - creation_object["last_update"] = timezone.now() - - # Write to the database. - objects_written = db_utils.write_object( - p_app_label="api", - p_model_name="bco", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=creation_object, - ) - - if objects_written < 1: - # Issue with writing out to DB - returning.append( - db_utils.messages(parameters={})["400_bad_request"] - ) - any_failed = True - prefix_counter.n_objects = prefix_counter.n_objects + 1 - prefix_counter.save() - # Update the request status. - returning.append( - db_utils.messages( - parameters={"object_id": creation_object["object_id"]} - )["201_create"] - ) - - else: - # Update the request status. - returning.append(db_utils.messages(parameters={})["400_bad_request"]) - any_failed = True - - else: - # Update the request status. - returning.append( - db_utils.messages(parameters={"prefix": creation_object["prefix"]})[ - "401_prefix_unauthorized" - ] - ) - any_failed = True - - if any_failed: - return Response(status=status.HTTP_207_MULTI_STATUS, data=returning) - - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_delete.py b/api/scripts/method_specific/POST_api_objects_drafts_delete.py deleted file mode 100755 index 95d36b69..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_delete.py +++ /dev/null @@ -1,117 +0,0 @@ -# BCO model -from ...models import bco - -# For getting objects out of the database. -from ..utilities import DbUtils - -# User information -from ..utilities import UserUtils - -# Permisions for objects -from guardian.shortcuts import get_perms - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# Source: https://codeloop.org/django-rest-framework-course-for-beginners/ - - -def POST_api_objects_drafts_delete(incoming): - - # Take the bulk request and delete a draft object from it. - - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - uu = UserUtils.UserUtils() - - # The token has already been validated, - # so the user is guaranteed to exist. - - # Get the User object. - user = uu.user_from_request(rq=incoming) - - # Get the user's prefix permissions. - px_perms = uu.prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["add"] - ) - - # Define the bulk request. - bulk_request = incoming.data["POST_api_objects_drafts_delete"] - - # Construct an array to return the objects. - returning = [] - - # Since bulk_request is an array, go over each - # item in the array. - for deletion_object in bulk_request: - - # Get the prefix for this draft. - standardized = deletion_object["object_id"].split("/")[-1].split("_")[0].upper() - - # Does the requestor have delete permissions for - # the *prefix*? - if "delete_" + standardized in px_perms: - - # The requestor has delete permissions for - # the prefix, but do they have object-level - # delete permissions? - - # This can be checked by seeing if the requestor - # is the object owner OR they are a user with - # object-level delete permissions OR if they are in a - # group that has object-level change permissions. - - # To check these options, we need the actual object. - if bco.objects.filter(object_id=deletion_object["object_id"]).exists(): - - objected = bco.objects.get(object_id=deletion_object["object_id"]) - - # We don't care where the delete permission comes from, - # be it a User permission or a Group permission. - all_permissions = get_perms(user, objected) - - if ( - user.username == objected.owner_user.username - or "delete_" + standardized in all_permissions - ): - - # Delete the object. - objected.delete() - - # Update the request status. - returning.append( - db.messages( - parameters={"object_id": deletion_object["object_id"]} - )["200_OK_object_delete"] - ) - - else: - - # Insufficient permissions. - returning.append( - db.messages(parameters={})["403_insufficient_permissions"] - ) - - else: - - # Couldn't find the object. - returning.append( - db.messages(parameters={"object_id": deletion_object["object_id"]}) - )["404_object_id"] - - else: - - # Update the request status. - returning.append( - db.messages(parameters={"prefix": standardized})[ - "401_prefix_unauthorized" - ] - ) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - # For example, a table may not have been found for the first - # requested draft. - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_modify.py b/api/scripts/method_specific/POST_api_objects_drafts_modify.py deleted file mode 100755 index 73c55241..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_modify.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python3 -"""Modify Draft Object - --------------------- -Modifies a BCO object. The BCO object must be a draft in order to be -modifiable. The contents of the BCO will be replaced with the new -contents provided in the request body. -""" - -from api.models import BCO -from api.scripts.utilities import DbUtils -from api.scripts.utilities import UserUtils - -from django.utils import timezone -from guardian.shortcuts import get_perms -from rest_framework import status -from rest_framework.response import Response - -# Source: https://codeloop.org/django-rest-framework-course-for-beginners/ - - -def post_api_objects_drafts_modify(request): - """Modify Draft - - Take the bulk request and modify a draft object from it. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into arbitrary - media types. As this view is for a bulk operation, status 200 means - that the request was successfully processed for each item in the - request. A status of 207 means that some of the requests were - successfull. - """ - - db_utils = DbUtils.DbUtils() - user = UserUtils.UserUtils().user_from_request(request=request) - try: - bulk_request = request.data["POST_api_objects_drafts_modify"] - except KeyError as error: - return Response(status=status.HTTP_400_BAD_REQUEST, data={ - 'KeyError': f'{str(error)}' - }) - px_perms = UserUtils.UserUtils().prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["add"] - ) - - # Construct an array to return the objects. - returning = [] - any_failed = False - for draft_object in bulk_request: - try: - # Get the prefix for this draft. - prefix = draft_object["object_id"].split("/")[-2].split("_")[0].upper() - - # Does the requestor have change permissions for - # the *prefix*? - - # TODO: add permission setting view... - # if 'change_' + prefix in px_perms: - - if "add_" + prefix in px_perms: - - # The requestor has change permissions for - # the prefix, but do they have object-level - # change permissions? - - # This can be checked by seeing if the requestor - # is the object owner OR they are a user with - # object-level change permissions OR if they are in a - # group that has object-level change permissions. - # To check these options, we need the actual object. - if draft_object["object_id"] not in draft_object["contents"]["object_id"]: - returning.append( - db_utils.messages( - parameters={ - "object_id": draft_object["contents"]["object_id"], - "draft_object_id": draft_object["object_id"], - } - )["409_draft_object_id_conflict"] - ) - any_failed = True - continue - - if BCO.objects.filter( - object_id=draft_object["contents"]["object_id"] - ).exists(): - objected = BCO.objects.get( - object_id=draft_object["contents"]["object_id"] - ) - - # We don't care where the view permission comes from, - # be it a User permission or a Group permission. - all_permissions = get_perms(user, objected) - # TODO: add permission setting view... - if ( - user.username == objected.owner_user.username - or "add_" + prefix in px_perms - ): - - # # User does *NOT* have to be in the owner group! - # # to assign the object's group owner. - # if Group.objects.filter( - # name = draft_object['owner_group'].lower() - # ).exists(): - # - # Update the object. - # *** COMPLETELY OVERWRITES CONTENTS!!! *** - objected.contents = draft_object["contents"] - - if "state" in draft_object: - if draft_object["state"] == "DELETE": - objected.state = "DELETE" - - # Set the update time. - objected.last_update = timezone.now() - - # Save it. - objected.save() - - # Update the request status. - returning.append( - db_utils.messages( - parameters={"object_id": draft_object["object_id"]} - )["200_update"] - ) - else: - # Insufficient permissions. - returning.append( - db_utils.messages(parameters={ - })["403_insufficient_permissions"] - ) - any_failed = True - - else: - returning.append( - db_utils.messages( - parameters={"object_id": draft_object["object_id"]} - )["404_object_id"] - ) - any_failed = True - else: - returning.append( - db_utils.messages(parameters={"prefix": prefix})[ - "401_prefix_unauthorized" - ] - ) - any_failed = True - except: - returning.append( - db_utils.messages(parameters={})[ - "400_bad_request" - ] - ) - any_failed = True - - if any_failed and len(returning) == 1: - if returning[0]["status_code"] == "403": - return Response(status=status.HTTP_403_FORBIDDEN, data=returning) - else: - return Response(status=status.HTTP_207_MULTI_STATUS, data=returning) - if any_failed and len(returning) > 1: - return Response(status=status.HTTP_207_MULTI_STATUS, data=returning) - - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_permissions.py b/api/scripts/method_specific/POST_api_objects_drafts_permissions.py deleted file mode 100755 index e5395c99..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_permissions.py +++ /dev/null @@ -1,158 +0,0 @@ -# BCO model -from api.models import BCO - -# For getting objects out of the database. -from ..utilities import DbUtils - -# User information -from ..utilities import UserUtils - -# Group info -from django.contrib.auth.models import Group - -# Permisions for objects -from guardian.shortcuts import get_groups_with_perms, get_perms, get_user_perms - -# Responses -from rest_framework import status -from rest_framework.response import Response - - -def POST_api_objects_drafts_permissions(incoming): - """ - Get BCO Permissions - - Returns the permissions for requested BCO objects. - """ - - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - uu = UserUtils.UserUtils() - - # The token has already been validated, - # so the user is guaranteed to exist. - - # Get the User object. - user = uu.user_from_request(rq=incoming) - - # Get the user's prefix permissions. - px_perms = uu.prefix_perms_for_user(flatten=True, user_object=user) - - # Define the bulk request. - bulk_request = incoming.data["POST_api_objects_drafts_permissions"] - - # Construct an array to return the objects. - returning = [] - any_failed = False - - # Since bulk_request is an array, go over each - # item in the array. - for creation_object in bulk_request: - - # Get the prefix for this object. - standardized = creation_object["object_id"].split("/")[-1].split("_")[0].upper() - - # Does the requestor have view permissions for - # the *prefix*? - if "view_" + standardized in px_perms: - print( - "bulk_request", - list(BCO.objects.filter(object_id=creation_object["object_id"])), - ) - # The requestor has change view for - # the prefix, but do they have object-level - # view permissions? - - # This can be checked by seeing if the requestor - # is the object owner OR they are a user with - # object-level view permissions OR if they are in a - # group that has object-level view permissions. - - # To check these options, we need the actual object. - if BCO.objects.filter(object_id=creation_object["object_id"]).exists(): - objected = BCO.objects.get(object_id=creation_object["object_id"]) - - # We don't care where the view permission comes from, - # be it a User permission or a Group permission. - - # This is a bit redundant since we are getting the permissions - # again below but it's a quick fix to find - # basic view permissions for this user and object. - all_permissions = get_perms(user, objected) - - if ( - user.username == objected.owner_user.username - or "view_" + standardized in all_permissions - ): - - # Kick back the permissions, - # *** but only for this requestor (user) ***. - - # Create a dictionary to return the permissions. - perms = {"username": {}, "group_names": {}} - - # We want to return the permissions in fine detail - # by user permissions and group permissions. - up = get_user_perms(user, objected) - - perms["username"][user.username] = list(up) - - # Get user's groups. - user_groups = list( - Group.objects.filter(user=user.pk).values_list( - "name", flat=True - ) - ) - - gp = get_groups_with_perms(objected, attach_perms=True) - - # See which of the group permissions apply to - # the user's groups. - for g, p in gp.items(): - if g.name in user_groups: - perms["group_names"][g.name] = p - - # print(perms) - - # Update the request status. - returning.append( - db.messages( - parameters={ - "object_id": creation_object["object_id"], - "object_perms": perms, - } - )["200_OK_object_permissions"] - ) - else: - # Insufficient permissions. - returning.append( - db.messages(parameters={})["403_insufficient_permissions"] - ) - any_failed = True - else: - # Couldn't find the object. - returning.append( - db.messages(parameters={"object_id": creation_object["object_id"]})[ - "404_object_id" - ] - ) - any_failed = True - - else: - # Update the request status. - returning.append( - db.messages(parameters={"prefix": standardized})[ - "401_prefix_unauthorized" - ] - ) - any_failed = True - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - # For example, a table may not have been found for the first - # requested draft. - if any_failed: - # If any of the requests failed, lets let the caller know so they can parse the returning information. - return Response(status=status.HTTP_300_MULTIPLE_CHOICES, data=returning) - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py b/api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py deleted file mode 100755 index cf0aaaf1..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_permissions_set.py +++ /dev/null @@ -1,242 +0,0 @@ -# BCO model -from ...models import BCO - -# For getting objects out of the database. -from ..utilities import DbUtils - -# User information -from ..utilities import UserUtils - -# Permisions for objects -from guardian.shortcuts import ( - assign_perm, - get_perms, - get_groups_with_perms, - get_users_with_perms, - remove_perm, -) -from django.contrib.auth.models import Group, User, Permission - -# Responses -from rest_framework import status -from rest_framework.response import Response - - -def POST_api_objects_drafts_permissions_set(incoming): - """ - Set the permissions for given objects. - - """ - - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - uu = UserUtils.UserUtils() - - # The token has already been validated, - # so the user is guaranteed to exist. - - # Get the User object. - user = uu.user_from_request(rq=incoming) - - # Get the user's prefix permissions. - px_perms = uu.prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["change"] - ) - - # Define the bulk request. - bulk_request = incoming.data["POST_api_objects_drafts_permissions_set"] - - # Construct an array to return the objects. - returning = [] - - # Since bulk_request is an array, go over each - # item in the array. - for permission_object in bulk_request: - - # Get the prefix for this object. - standardized = ( - permission_object["object_id"].split("/")[-1].split("_")[0].upper() - ) - - # Does the requestor have any change - # permissions for the prefix? - - # Notice that we do not look for "add" - # or "delete" permissions even though - # these are also object-level permissions. - - # In essence, we are asking whether or not - # the requestor can change any object - # under this prefix. - if "change_" + standardized in px_perms: - - # The requestor has change for - # the prefix, but do they have object-level - # change permissions? - - # This can be checked by seeing if the requestor - # is the object owner OR they are a user with - # object-level change permissions OR if they are in a - # group that has object-level change permissions. - - # To check these options, we need the actual object. - if BCO.objects.filter(object_id=permission_object["object_id"]).exists(): - - objected = BCO.objects.get(object_id=permission_object["object_id"]) - - # We don't care where the change permission comes from, - # be it a User permission or a Group permission. - all_permissions = get_perms(user, objected) - - if ( - user.username == objected.owner_user.username - or "change_" + objected.object_id in all_permissions - ): - - if "actions" in permission_object: - - # Set the working object to the actions. - action_set = permission_object["actions"] - - # Removals are processed first, then additions. - - # Remove the permissions provided, if any. - # TODO: This doesn't look like it would work here. - if "remove_permissions" in action_set: - for perm, assignee in action_set["remove_permissions"]: - if assignee == "users": - # TODO: if assignee is actually a string users, this will just loop through the characters - for u in assignee: - if uu.check_user_exists(un=u): - remove_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=User.objects.get( - username=u - ), - obj=objected, - ) - if assignee == "groups": - for g in assignee: - if uu.check_group_exists(n=g): - remove_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=Group.objects.get(name=g), - obj=objected, - ) - - if "full_permissions" in action_set: - for up, perms in get_users_with_perms( - obj=objected, attach_perms=True - ).items(): - for perm in perms: - remove_perm( - perm=perm, user_or_group=up, obj=objected - ) - - for gp, perms in get_groups_with_perms( - obj=objected, attach_perms=True - ).items(): - for perm in perms: - remove_perm( - perm=perm, user_or_group=gp, obj=objected - ) - - for perm, assignee in action_set[ - "full_permissions" - ].items(): - if assignee == "users": - for u in assignee: - if uu.check_user_exists(un=u): - assign_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=User.objects.get( - username=u - ), - obj=objected, - ) - - if assignee == "groups": - for g in assignee: - if uu.check_group_exists(n=g): - assign_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=Group.objects.get(name=g), - obj=objected, - ) - - if "add_permissions" in action_set: - for perm, assignee in action_set["add_permissions"].items(): - if assignee == "users": - for u in assignee: - if uu.check_user_exists(un=u): - assign_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=User.objects.get( - username=u - ), - obj=objected, - ) - if assignee == "groups": - for g in assignee: - if uu.check_group_exists(n=g): - assign_perm( - perm=Permission.objects.get( - codename=perm - + "_" - + objected.object_id - ), - user_or_group=Group.objects.get(name=g), - obj=objected, - ) - - returning.append( - db.messages(parameters={"object_id": objected.object_id})[ - "200_OK_object_permissions_set" - ] - ) - else: - # Insufficient permissions. - returning.append( - db.messages(parameters={})["403_insufficient_permissions"] - ) - else: - # Couldn't find the object. - returning.append( - db.messages( - parameters={"object_id": permission_object["object_id"]} - )["404_object_id"] - ) - else: - # Update the request status. - returning.append( - db.messages(parameters={"prefix": standardized})[ - "401_prefix_unauthorized" - ] - ) - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - # For example, a table may not have been found for the first - # requested draft. - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_publish.py b/api/scripts/method_specific/POST_api_objects_drafts_publish.py deleted file mode 100755 index e0ee71f4..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_publish.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env python3 -"""Publish draft - -publish a draft -""" - -from api.models import BCO -from api.model.prefix import prefix_table -from api.scripts.utilities import DbUtils, UserUtils -from django.contrib.auth.models import Group -from django.utils import timezone -from guardian.shortcuts import get_perms -from rest_framework import status, authtoken -from rest_framework.response import Response -from authentication.selectors import get_user_from_auth_token - -def post_api_objects_drafts_publish(request): - """Publish draft - - publish a draft - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - rest_framework.response.Response - An HttpResponse that allows its data to be rendered into arbitrary - media types. As this view is for a bulk operation, status 200 means - that the request was successfully processed for each item in the - request. A status of 300 means that some of the requests were - successfull. - """ - - returning = [] - any_failed = False - db_utils = DbUtils.DbUtils() - - try: - user = UserUtils.UserUtils().user_from_request(request=request) - except authtoken.models.Token.DoesNotExist: - user = get_user_from_auth_token(request.META.get("HTTP_AUTHORIZATION").split(" ")[1]) - prefix_perms = UserUtils.UserUtils().prefix_perms_for_user( - flatten=True, user_object=user - ) - try: - bulk_request = request.data["POST_api_objects_drafts_publish"] - except: - return Response(status=status.HTTP_400_BAD_REQUEST, data={"Request format not accepted."}) - - for publish_object in bulk_request: - if "draft_id" not in publish_object: - returning.append( - db_utils.messages(parameters={})[ - "400_bad_request" - ] - ) - any_failed = True - continue - - draft_exists = BCO.objects.filter( - object_id=publish_object["draft_id"], state="DRAFT" - ).exists() - - if draft_exists is False: - returning.append( - db_utils.messages(parameters={"object_id": publish_object["draft_id"]})[ - "404_object_id" - ] - ) - any_failed = True - continue - - objected = BCO.objects.get(object_id=publish_object["draft_id"]) - new_version = objected.contents["provenance_domain"]["version"] - prefix = publish_object["prefix"].upper() - prefix_counter = prefix_table.objects.get(prefix=prefix) - draft_id = publish_object["draft_id"] - - if publish_object.get("delete_draft") is not None: - delete_draft = publish_object["delete_draft"] - else: - delete_draft = False - - if "object_id" not in publish_object: - object_id = publish_object["draft_id"].split("/")[0:4] - object_id.append(new_version) - object_id = "/".join(object_id) - else: - object_id = publish_object["object_id"] - - versioned = {"published_id": object_id} - # versioned = db_utils.check_version_rules( - # published_id=object_id - # ) - prefix_auth = "publish_" + prefix in prefix_perms - object_exists = BCO.objects.filter(object_id=object_id).exists() - - if object_exists is True: - print(object_id) - parameters = {"object_id": object_id} - returning.append(db_utils.messages(parameters)["409_object_conflict"]) - any_failed = True - continue - - if draft_exists is True: - all_permissions = get_perms(user, objected) - is_owner = user.username == objected.owner_user.username - owner_group = Group.objects.get(name=user.username) - # can_publish = 'publish_' + publish_object['draft_id'] in all_permissions - if prefix_auth is True: - # if is_owner is True or can_publish is True: - if delete_draft is True: - objected.last_update = timezone.now() - objected.state = "PUBLISHED" - objected.owner_group = owner_group - objected.object_id = versioned["published_id"] - objected.contents["object_id"] = versioned["published_id"] - objected.save() - - # Update the request status. - returning.append( - db_utils.messages(parameters=versioned)[ - "200_OK_object_publish_draft_deleted" - ] - ) - - else: - new_object = {} - new_object["contents"] = objected.contents - new_object["object_id"] = object_id - new_object["contents"]["object_id"] = object_id - new_object["owner_group"] = owner_group - new_object["owner_user"] = objected.owner_user - new_object["prefix"] = objected.prefix - new_object["last_update"] = timezone.now() - new_object["schema"] = "IEEE" - new_object["state"] = "PUBLISHED" - - # Write to the database. - objects_written = db_utils.write_object( - p_app_label="api", - p_model_name="BCO", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=new_object, - ) - prefix_counter.n_objects = prefix_counter.n_objects + 1 - prefix_counter.save() - if objects_written < 1: - # Issue with writing out to DB - returning.append( - db_utils.messages(parameters={})["400_bad_request"] - ) - any_failed = True - else: - # Update the request status. - returning.append( - db_utils.messages(parameters=versioned)[ - "200_OK_object_publish_draft_not_deleted" - ] - ) - - # else: - # # Insufficient permissions. - # returning.append(db_utils.messages( - # parameters={ })['403_insufficient_permissions'] - # ) - # any_failed = True - - else: - # Update the request status. - returning.append( - db_utils.messages(parameters={"prefix": prefix})[ - "401_prefix_publish_unauthorized" - ] - ) - any_failed = True - - # published = db_utils.publish( - # owner_group=Group.objects.get( - # name=user.username - # ).name, - # owner_user = user.username, - # prefix = prefix, - # publishable = objected, - # publishable_id = object_id, - # replace_draft = delete_draft - # ) - - # # Did the publishing go well? - # if type(published) is dict: - # # Update the request status. - # returning.append(db_utils.messages( - # parameters=versioned)['200_OK_object_publish'] - # ) - - # # Lastly, if we were given the directive to delete - # # the draft on publish, process that. - - # # Does the requestor have delete permissions on - # # the object? - - if any_failed: - return Response(status=status.HTTP_207_MULTI_STATUS, data=returning) - - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_read.py b/api/scripts/method_specific/POST_api_objects_drafts_read.py deleted file mode 100755 index e68a2153..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_read.py +++ /dev/null @@ -1,121 +0,0 @@ -# BCO model -from ...models import BCO - -# For getting objects out of the database. -from ..utilities import DbUtils - -# User information -from ..utilities import UserUtils - -# Permisions for objects -from guardian.shortcuts import get_perms - -# Responses -from rest_framework import status -from rest_framework.response import Response - - -def POST_api_objects_drafts_read(incoming): - """ - Take the bulk request and read a draft object from it. - """ - - # Instantiate any necessary imports. - db = DbUtils.DbUtils() - uu = UserUtils.UserUtils() - - # The token has already been validated, - # so the user is guaranteed to exist. - - # Get the User object. - user = uu.user_from_request(rq=incoming) - - # Get the user's prefix permissions. - px_perms = uu.prefix_perms_for_user( - flatten=True, user_object=user, specific_permission=["view"] - ) - - # Define the bulk request. - bulk_request = incoming.data["POST_api_objects_drafts_read"] - - # Construct an array to return the objects. - returning = [] - any_failed = False - - # Since bulk_request is an array, go over each - # item in the array. - for read_object in bulk_request: - # Get the prefix for this draft. - standardized = read_object["object_id"].split("/")[-1].split("_")[0].upper() - - # Does the requestor have view permissions for - # the *prefix*? - if "view_" + standardized in px_perms: - - # The requestor has view permissions for - # the prefix, but do they have object-level - # view permissions? - - # This can be checked by seeing if the requestor - # is the object owner OR they are a user with - # object-level view permissions OR if they are in a - # group that has object-level view permissions. - - # To check these options, we need the actual object. - if BCO.objects.filter(object_id=read_object["object_id"]).exists(): - objected = BCO.objects.get(object_id=read_object["object_id"]) - - # We don't care where the view permission comes from, - # be it a User permission or a Group permission. - all_permissions = get_perms(user, objected) - - if ( - user.username == objected.owner_user.username - or "view_" + standardized in all_permissions - ): - - # Read the object. - returning.append( - db.messages( - parameters={ - "contents": objected.contents, - "object_id": read_object["object_id"], - } - )["200_OK_object_delete"] - ) - - else: - - # Insufficient permissions. - returning.append( - db.messages(parameters={})["403_insufficient_permissions"] - ) - any_failed = True - - else: - - # Couldn't find the object. - returning.append( - db.messages(parameters={"object_id": read_object["object_id"]})[ - "404_object_id" - ] - ) - any_failed = True - - else: - # Update the request status. - returning.append( - db.messages(parameters={"prefix": standardized})[ - "401_prefix_unauthorized" - ] - ) - any_failed = True - - # As this view is for a bulk operation, status 200 - # means that the request was successfully processed, - # but NOT necessarily each item in the request. - # For example, a table may not have been found for the first - # requested draft. - if any_failed: - return Response(status=status.HTTP_300_MULTIPLE_CHOICES, data=returning) - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_drafts_token.py b/api/scripts/method_specific/POST_api_objects_drafts_token.py deleted file mode 100755 index 43438e2e..00000000 --- a/api/scripts/method_specific/POST_api_objects_drafts_token.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python3 -"""Retrieve Draft From Token - - -""" - -import re - -# Concatenating QuerySets -from itertools import chain -from typing import Optional, Tuple -from api.models import BCO -from api.scripts.utilities import UserUtils - -# Object-level permissions -from guardian.shortcuts import get_objects_for_user - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# Below is helper code to deal with how we are allowing non standard -# versions (i.e. 1.2 instead of 1.2.0, etc). - -import semver -from semver import VersionInfo as Version - -BASEVERSION = re.compile( - r"""[vV]? - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - )? - )? - """, - re.VERBOSE, -) - - -def coerce(version: str) -> Tuple[Version, Optional[str]]: - """ - Convert an incomplete version string into a semver-compatible Version - object - - * Tries to detect a "basic" version string (``major.minor.patch``). - * If not enough components can be found, missing components are - set to zero to obtain a valid semver version. - - Parameters - ---------- - version: str - the version string to convert - - Returns - ------- - tuple(:class:`Version` | None, str) - a tuple with a :class:`Version` instance (or ``None`` - if it's not a version) and the rest of the string which doesn't - belong to a basic version. - """ - - match = BASEVERSION.search(version) - if not match: - return (None, version) - - ver = { - key: 0 if value is None else value for key, value in match.groupdict().items() - } - ver = Version(**ver) - rest = match.string[match.end() :] # noqa:E203 - return ver, rest - - -def POST_api_objects_drafts_token(rqst, internal=False): - """ - Get all DRAFT objects for a token. - - Parameters - ---------- - rqst: rest_framework.request.Request - Django request object. - internal: bool - denotes if the call being made to this handler internally - - Returns - ------- - - """ - - # The token has already been validated, - # so the user is guaranteed to exist. - - # Get the user's info. - # Instantiate UserUtils. - user_utils = UserUtils.UserUtils() - - # Get the user object. - user_info = user_utils.user_from_request(request=rqst) - # Any object that a user has access to - # in any way counts as an "object". - # That is, any permission counts as - # a "view" permission... - - # However, the prefix permissions must - # be in place for the user to view - # anything. Recall that prefix - # permissions override any object-level - # permissions. - - # We can't just use a straight filter here - # because we have to use two different - # models (the prefix permissions on the - # one hand and the BCO objects on the other). - - # First, get all prefixes available to the - # user. - user_prefixes = user_utils.prefixes_for_user(user_object=user_info) - - # Now get any object where the user has an - # object-level permission. - - # Use an empty list of perms to get ANY perm. - # Source: https://stackoverflow.com/a/24980558 - user_objects = get_objects_for_user( - user=user_info, perms=[], klass=BCO, any_perm=True - ) - - # Now get all objects under these prefixes. - prefix_objects = BCO.objects.filter(prefix__in=user_prefixes, state="DRAFT") - - # Assume all the values are supposed to be returned. - # Source: https://stackoverflow.com/a/51733590 - return_values = [ - "contents", - "last_update", - "object_class", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ] - - # If there are any valid keys in the request, - # use them to narrow down the fields. - - # Redundant logic here since the schema check - # would catch this... - if "fields" in rqst.data["POST_api_objects_drafts_token"]: - - # Take the fields and find their intersection with - # the available fields. - # Source: https://stackoverflow.com/a/3697438 - common_fields = list( - set(rqst.data["POST_api_objects_drafts_token"]["fields"]) - & set(return_values) - ) - - if len(common_fields) > 0: - return_values = common_fields - - # Return based on whether or not we're using an internal - # call. - if not internal: - print( - " Not Internal, user response: {}".format( - user_objects.intersection(prefix_objects).values(*return_values) - ) - ) - # Get the user's DRAFT objects. - return Response( - data=user_objects.intersection(prefix_objects).values(*return_values), - status=status.HTTP_200_OK, - ) - - elif internal: - # Concatenate the QuerySets. - # Source: https://stackoverflow.com/a/434755 - - # Get the user's DRAFT objects AND - # add in the published objects. - # TODO: This needs to only return the most recent PUBLISHED objects not all of the versions - - published = BCO.objects.filter(state="PUBLISHED").values() - # unique_published = [] - unique_published = set() - - # E.g. - # published[0]["contents"]["object_id"] = 'http://127.0.0.1:8000/BCO_000010/1.0' - - bcos = {} - for pub in published: - # TODO: We should move this out of a try except and try to handle various situations, - # this is currently assuming that the format is - # http://URL:PORT/BCO ACCESSION/BCO VERSION - this may not always be true - try: - bco_url, bco_id_accession, bco_id_version = pub["object_id"].rsplit( - "/", 2 - ) - bco_id_name = bco_url + "/" + bco_id_accession - except Exception as error: - print( - "Biocompute Name, Version, and URL not formatted as expected: {}".format( - error - ) - ) - return Response(status=status.HTTP_400_BAD_REQUEST) - if bco_id_name in bcos: - # Other version of this BCO object exists - current_version = bcos[bco_id_name]["bco_version"] - # if semver.compare(bco_id_version, current_version, key=coerce): - # # New one is newer version, set: - if float(current_version) < float(bco_id_version): - bcos[bco_id_name] = { - "bco_name": bco_id_name, - "bco_version": current_version, - "bco_object": pub, - } - else: - pass - else: - # Not in dictionary yet - bcos[bco_id_name] = { - "bco_name": bco_id_name, - "bco_version": bco_id_version, - "bco_object": pub, - } - for key, value in bcos.items(): - unique_published.add(value["bco_object"]["id"]) - unique_published = bco.objects.filter(id__in=unique_published) - result_list = chain( - unique_published.values(*return_values), - prefix_objects.values(*return_values), - ) - return Response(data=result_list, status=status.HTTP_200_OK) diff --git a/api/scripts/method_specific/POST_api_objects_publish.py b/api/scripts/method_specific/POST_api_objects_publish.py deleted file mode 100755 index ed5e5a0b..00000000 --- a/api/scripts/method_specific/POST_api_objects_publish.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 -"""Bulk Publish - --------------------- -Take the bulk request and publish objects directly. -""" - -from api.models import BCO -from api.model.prefix import prefix_table, Prefix -from api.scripts.utilities.DbUtils import DbUtils as db_utils -from api.scripts.utilities.UserUtils import UserUtils as user_utils -from api.scripts.utilities.JsonUtils import parse_bco -from django.conf import settings -from django.utils import timezone -from rest_framework import status -from rest_framework.response import Response - - -def post_api_objects_publish(incoming): - """ - Take the bulk request and publish objects directly. - """ - - root_uri = settings.OBJECT_NAMING["root_uri"] - user = user_utils().user_from_request(request=incoming) - px_perms = user_utils().prefix_perms_for_user(flatten=True, user_object=user) - bulk_request = incoming.data["POST_api_objects_publish"] - returning = [] - any_failed = False - results = {} - for publish_object in bulk_request: - try: - results = parse_bco(publish_object["contents"], results) - except KeyError as error: - returning.append( - db_utils().messages(parameters={"errors": str(error)})[ - "400_non_publishable_object" - ] - ) - any_failed = True - continue - object_key = publish_object["contents"]["object_id"] - if results[object_key]["number_of_errors"] > 0: - returning.append( - db_utils().messages(parameters={"errors": results})[ - "400_non_publishable_object" - ] - ) - any_failed = True - continue - - prefix = publish_object["prefix"].upper() - if Prefix.objects.filter(prefix=prefix).exists(): - prefix_counter = prefix_table.objects.get(prefix=prefix) - - if "publish_" + prefix in px_perms: - if "object_id" in publish_object: - accession = publish_object["object_id"].split("/")[-2] - version = publish_object["object_id"].split("/")[-1] - object_num = int( - publish_object["object_id"].split("_")[1].split("/")[0] - ) - constructed_obj_id = ( - root_uri - + "/" - + accession - + "/" - + publish_object["contents"]["provenance_domain"]["version"] - ) - if BCO.objects.filter(object_id__contains=accession+'/'+version).exists(): - # import pdb; pdb.set_trace() - returning.append( - db_utils().messages(parameters={"object_id": accession+'/'+version})[ - "409_object_conflict" - ] - ) - any_failed = True - continue - if publish_object["object_id"] != constructed_obj_id: - returning.append( - db_utils().messages( - parameters={ - "object_id": publish_object["object_id"], - "constructed_obj_id": constructed_obj_id, - } - )["409_object_id_conflict"] - ) - any_failed = True - continue - new_object = {} - new_object["contents"] = publish_object["contents"] - new_object["object_id"] = constructed_obj_id - new_object["contents"]["object_id"] = constructed_obj_id - new_object["owner_group"] = publish_object["owner_group"] - new_object["owner_user"] = user.username - new_object["prefix"] = prefix - new_object["last_update"] = timezone.now() - new_object["schema"] = "IEEE" - new_object["state"] = "PUBLISHED" - - objects_written = db_utils().write_object( - p_app_label="api", - p_model_name="BCO", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=new_object, - ) - if prefix_counter.n_objects < object_num: - prefix_counter.n_objects = object_num + 1 - prefix_counter.save() - returning.append( - db_utils().messages( - parameters={"object_id": constructed_obj_id} - )["201_create"] - ) - else: - object_num = format(prefix_counter.n_objects, "06d") - version = publish_object["contents"]["provenance_domain"]["version"] - constructed_obj_id = ( - root_uri + "/" + prefix + "_" + object_num + "/" + version - ) - - new_object = {} - new_object["contents"] = publish_object["contents"] - new_object["object_id"] = constructed_obj_id - new_object["contents"]["object_id"] = constructed_obj_id - new_object["owner_group"] = publish_object["owner_group"] - new_object["owner_user"] = user.username - new_object["prefix"] = prefix - new_object["last_update"] = timezone.now() - new_object["schema"] = "IEEE" - new_object["state"] = "PUBLISHED" - - objects_written = db_utils().write_object( - p_app_label="api", - p_model_name="BCO", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=new_object, - ) - - prefix_counter.n_objects = prefix_counter.n_objects + 1 - prefix_counter.save() - returning.append( - db_utils().messages( - parameters={"object_id": constructed_obj_id} - )["201_create"] - ) - - else: - returning.append( - db_utils().messages(parameters={"prefix": prefix})[ - "401_prefix_unauthorized" - ] - ) - any_failed = True - - else: - returning.append( - db_utils().messages(parameters={"prefix": prefix})["404_missing_prefix"] - ) - any_failed = True - - if any_failed: - return Response(status=status.HTTP_207_MULTI_STATUS, data=returning) - - return Response(status=status.HTTP_200_OK, data=returning) diff --git a/api/scripts/method_specific/POST_api_objects_published.py b/api/scripts/method_specific/POST_api_objects_published.py deleted file mode 100644 index 3e9e8f30..00000000 --- a/api/scripts/method_specific/POST_api_objects_published.py +++ /dev/null @@ -1,115 +0,0 @@ -# BCOs -from ...models import BCO - -# User information -from ..utilities import UserUtils - -# Object-level permissions -from guardian.shortcuts import get_objects_for_user - -# Concatenating QuerySets -from itertools import chain - -# Responses -from rest_framework import status -from rest_framework.response import Response - -# Below is helper code to deal with how we are allowing non standard versions (i.e. 1.2 instead of 1.2.0, etc). -import re -import semver -from semver import VersionInfo as Version -from typing import Optional, Tuple - -# TODO: This is repeated code, should consolidate -BASEVERSION = re.compile( - r"""[vV]? - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - (\. - (?P0|[1-9]\d*) - )? - )? - """, - re.VERBOSE, -) - - -def coerce(version: str) -> Tuple[Version, Optional[str]]: - """ - Convert an incomplete version string into a semver-compatible Version - object - * Tries to detect a "basic" version string (``major.minor.patch``). - * If not enough components can be found, missing components are - set to zero to obtain a valid semver version. - :param str version: the version string to convert - :return: a tuple with a :class:`Version` instance (or ``None`` - if it's not a version) and the rest of the string which doesn't - belong to a basic version. - :rtype: tuple(:class:`Version` | None, str) - """ - match = BASEVERSION.search(version) - if not match: - return (None, version) - - ver = { - key: 0 if value is None else value for key, value in match.groupdict().items() - } - ver = Version(**ver) - rest = match.string[match.end() :] # noqa:E203 - return ver, rest - - -def POST_api_objects_published(): - """ - Get All published objects (publicly available) - """ - - published = BCO.objects.filter(state="PUBLISHED").values() - unique_published = [] - - # E.g. - # published[0]["contents"]["object_id"] = 'http://127.0.0.1:8000/BCO_000010/1.0' - - bcos = {} - for p in published: - # TODO: We should move this out of a try except and try to handle various situations, this is currently - # assuming that the format is http://URL:PORT/BCO NAME/BCO VERSION - this may not always be true - try: - bco_url, bco_id_name, bco_id_version = p["contents"]["object_id"].rsplit( - "/", 2 - ) - except Exception as e: - print( - "Biocompute Name, Version, and URL not formatted as expected: {}".format( - e - ) - ) - return Response(status=status.HTTP_400_BAD_REQUEST) - - if bco_url in bcos: - # Other version of this BCO object exists - current_version = bcos[bco_url]["bco_version"] - - if semver.compare(bco_id_version, current_version, key=coerce): - # New one is newer version, set: - bcos[bco_url] = { - "bco_name": bco_id_name, - "bco_version": bco_id_version, - "bco_object": p, - } - - else: - # Do nothing - pass - else: - # Not in dictionary yet - bcos[bco_url] = { - "bco_name": bco_id_name, - "bco_version": bco_id_version, - "bco_object": p, - } - for key, value in bcos.items(): - unique_published.append(value["bco_object"]) - - return Response(data=unique_published, status=status.HTTP_200_OK) diff --git a/api/scripts/method_specific/POST_api_objects_search.py b/api/scripts/method_specific/POST_api_objects_search.py deleted file mode 100755 index 6b3cfe27..00000000 --- a/api/scripts/method_specific/POST_api_objects_search.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python3 -"""BCO Search - -""" - -from itertools import chain - -from api.models import BCO -from api.model.prefix import Prefix -from api.scripts.utilities import UserUtils -from guardian.shortcuts import get_objects_for_user -from rest_framework import status -from rest_framework.response import Response - - -def post_api_objects_search(request): - """Search for BCOs - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - List of BCOs that met search criteria - - """ - - return_values = [ - "contents", - "last_update", - "object_class", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ] - - query = request.data["POST_api_objects_search"][0] - search_type = query["type"] - try: - search_value = query["search"] - except KeyError: - search_value = "" - user_utils = UserUtils.UserUtils() - user_info = request._user - user_prefixes = user_utils.prefixes_for_user(user_object=user_info) - - prefix_perms = user_utils.prefix_perms_for_user( - flatten=True, user_object=user_info, specific_permission=["add"] - ) - - if search_type == "bco_id": - publish_list = BCO.objects.filter( - object_id__icontains=search_value, state="PUBLISHED" - ) - if user_info.username == "anon": - result_list = chain(publish_list.values(*return_values)) - else: - user_objects = get_objects_for_user( - user=user_info, perms=[], klass=BCO, any_perm=True - ) - draft_list = BCO.objects.filter( - object_id__icontains=search_value, - prefix__in=user_prefixes, - state="DRAFT", - ).exclude(state="DELETE") - bco_list = draft_list.union(publish_list) - result_list = chain(bco_list.values(*return_values)) - - if search_type == "prefix": - search_value = search_value.upper() - try: - prefix = Prefix.objects.get(prefix=search_value).prefix - - except Prefix.DoesNotExist: - return Response( - status=status.HTTP_404_NOT_FOUND, - data={ - "request_status": "FAILURE", - "status_code": "404", - "message": "That prefix was not found on this server.", - }, - ) - - if prefix in user_prefixes: - bco_list = ( - BCO.objects.filter(prefix=prefix).values().exclude(state="DELETE") - ) - result_list = chain(bco_list.values(*return_values)) - - else: - return Response( - status=status.HTTP_403_FORBIDDEN, - data={ - "request_status": "FAILURE", - "status_code": "403", - "message": "The token provided does not have sufficient" - " permissions for the requested prefix.", - }, - ) - - if search_type == "mine": - if user_info.username == "anon": - result_list = chain( - BCO.objects.filter(state="PUBLISHED").values(*return_values) - ) - - else: - result_list = chain( - BCO.objects.filter(owner_user=user_info) - .exclude(state="DELETE") - .values(*return_values) - ) - # print(len(list(result_list))) - - return Response(status=status.HTTP_200_OK, data=result_list) diff --git a/api/scripts/method_specific/POST_api_objects_token.py b/api/scripts/method_specific/POST_api_objects_token.py deleted file mode 100755 index a0a8a33e..00000000 --- a/api/scripts/method_specific/POST_api_objects_token.py +++ /dev/null @@ -1,25 +0,0 @@ -# Draft objects -from .POST_api_objects_drafts_token import POST_api_objects_drafts_token - -# Responses -from rest_framework import status -from rest_framework.response import Response - - -def POST_api_objects_token(rqst): - """ - Get all objects for a token. - - The token has already been validated, - so the user is guaranteed to exist. - - Make the internal call, but change - the request key so that we can re-use - POST_api_objects_draft_token, and mark the internal - flag as True so that we can get published - objects. - """ - rqst.data["POST_api_objects_drafts_token"] = rqst.data.pop("POST_api_objects_token") - - # Get the user's objects. - return POST_api_objects_drafts_token(rqst=rqst, internal=True) diff --git a/api/scripts/method_specific/POST_validate_payload_against_schema.py b/api/scripts/method_specific/POST_validate_payload_against_schema.py deleted file mode 100755 index a9171541..00000000 --- a/api/scripts/method_specific/POST_validate_payload_against_schema.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -"""Bulk Validate BioCompute Objects -""" - -from rest_framework import status -from rest_framework.response import Response -from api.scripts.utilities.JsonUtils import parse_bco - - -def post_validate_bco(request): - """Bulk BCO Validation - - Take the bulk request and validate each BCO. - - Parameters - ---------- - request : rest_framework.request.Request - The bulk request object. - - Returns - ------- - Response : dict - A rest framework response object. The response data is a list of - dictionaries, each of which corisponding to one of the BCOs submitted - for validation. - """ - - bco_list = request.data["POST_validate_bco"] - - results = {} - any_failed = False - - for bco in bco_list: - try: - results = parse_bco(bco, results) - - if bco["object_id"] == '': - identifier = bco_list.index(bco) - results[identifier] = results[''] - del results[''] - else: - identifier = bco["object_id"] - - if results[identifier]["number_of_errors"] == 0: - results[identifier]["error_detail"] = ["BCO Valid"] - else: - any_failed = True - - except Exception as error: - results[bco_list.index(bco)] = { - "number_of_errors": 1, - "error_detail": ["Bad request. BCO is not formatted correctly."] - } - any_failed = True - - if any_failed is True: - return Response(status=status.HTTP_207_MULTI_STATUS, data=results) - - return Response(status=status.HTTP_200_OK, data=results) diff --git a/api/scripts/utilities/DbUtils.py b/api/scripts/utilities/DbUtils.py deleted file mode 100755 index 398e86e1..00000000 --- a/api/scripts/utilities/DbUtils.py +++ /dev/null @@ -1,986 +0,0 @@ -#!/usr/bin/env python3 -"""DB Utilities -Functions for operations with DB -""" - -import random -import re -import uuid -import json -import datetime -import requests -from api.models import BCO -from api.serializers import getGenericSerializer -from api.scripts.utilities import UserUtils -from django.apps import apps -from django.conf import settings -from django.contrib.auth.models import Group, User -from django.contrib.contenttypes.models import ContentType -from django.utils import timezone - - -class DbUtils: - """Class Description - ----------------- - These methods are for interacting with our sqlite database. - Checking whether or not an object exists. - """ - - def check_object_id_exists(self, p_app_label, p_model_name, p_object_id): - """Simple existence check. - Source: https://stackoverflow.com/a/9089028 - Source: https://docs.djangoproject.com/en/3.1/ref/models/querysets/#exists - """ - - if ( - apps.get_model(app_label=p_app_label, model_name=p_model_name) - .objects.filter(object_id=p_object_id) - .exists() - ): - return None - else: - return 1 - - # Checking whether or not a user exists. - def check_user_exists(self, p_app_label, p_model_name, p_email): - """Simple existence check. - Source: https://stackoverflow.com/a/9089028 - Source: https://docs.djangoproject.com/en/3.1/ref/models/querysets/#exists - """ - - if ( - apps.get_model(app_label=p_app_label, model_name=p_model_name) - .objects.filter(email=p_email) - .exists() - ): - - return 1 - - else: - - return None - - # Check version rules - def check_version_rules(self, published_id): - """BCO Version Check - Potentially publishing a new version - of a published object, but we have to check to - see if the provided URI exists in the publishing table. - - We can take the exact version of the object ID OR - only the root version. For example, - 'http://hostname/some/other/paths/BCO_5' and - 'http://hostname/some/other/paths/BCO_5/3.4' would invoke the same - logic here, assuming that version 3.4 of BCO_5 is the latest version. - """ - - # Does the provided object ID exist? - if BCO.objects.filter(object_id=published_id).exists(): - - split_up = published_id.split("/") - # Get the version. - version = split_up[-1:][0] - if version == "DRAFT": - split_up[len(split_up) - 1] = "1.0" - return {"published_id": "/".join(split_up)} - - else: - # Increment the minor version. - incremented = version.split(".") - incremented[1] = int(incremented[1]) + 1 - incremented = incremented[0] + "." + str(incremented[1]) - - # Create the object ID. - split_up[len(split_up) - 1] = incremented - - # Kick back the minor-incremented object ID. - return {"published_id": "/".join(split_up)} - - else: - - # If the EXACT object ID wasn't found, then - # the user may have provided either a root version - # of the URI or a version of the same root URI. - - # If the provided version is larger - # than the version that would be generated automatically, - # then that provided version is used. - - # First determine whether or not the provided URI - # only has the root or has the root and the version. - - # Should do this by using settings.py root_uri - # information... - - # Split up the URI into the root ID and the version. - root_uri = "" - version = "" - - if re.match(r"(.*?)/[A-Z]+_(\d+)$", published_id): - - # Only the root ID was passed. - root_uri = published_id - - elif re.match(r"(.*?)/[A-Z]+_(\d+)/(\d+)\.(\d+)$", published_id): - - # The root ID and the version were passed. - split_up = published_id.split("/") - - root_uri = "/".join(split_up[:-1]) - - version = split_up[-1:] - - # See if the root ID even exists. - - # Note the trailing slash in the regex search to prevent - # sub-string matches (e.g. http://127.0.0.1:8000/BCO_5 and - # http://127.0.0.1:8000/BCO_53 would both match the regex - # http://127.0.0.1:8000/BCO_5 if we did not have the trailing - # slash). - all_versions = list( - BCO.objects.filter( - object_id__regex=rf"{root_uri}/", state="PUBLISHED" - ).values_list("object_id", flat=True) - ) - - # Get the latest version for this object if we have any. - if len(all_versions) > 0: - - # There was at least one version of the root ID, - # so now perform some logic based on whether or - # not a version was also passed. - - # First find the latest version of the object. - latest_major = 0 - latest_minor = 0 - - latest_version = [i.split("/")[-1:][0] for i in all_versions] - - for i in latest_version: - - major_minor_split = i.split(".") - - if int(major_minor_split[0]) >= latest_major: - if int(major_minor_split[1]) >= latest_minor: - latest_major = int(major_minor_split[0]) - latest_minor = int(major_minor_split[1]) - - # The version provided may fail, so create a flag to - # track this. - failed_version = False - - # If the root ID and the version were passed, check - # to see if the version given is greater than that which would - # be generated automatically. - if version != "": - - # We already have the automatically generated version - # number. Now we just need to compare it with the - # number that was provided. - if ( - int(version[0].split(".")[0]) - > latest_major & int(version[0].split(".")[1]) - > latest_minor - ): - - latest_major = int(version[0].split(".")[0]) - latest_minor = int(version[0].split(".")[1]) - - # Write with the version provided. - published_id = ( - published_id - + "/" - + str(latest_major) - + "." - + str(latest_minor) - ) - - else: - - # Bad version provided. - failed_version = True - - else: - - # If only the root ID was passed, find the latest - # version in the database, then increment the version. - - # Write with the minor version incremented. - published_id = ( - published_id - + "/" - + str(latest_major) - + "." - + str(latest_minor + 1) - ) - - # Did everything go properly with the version provided? - if failed_version is False: - - # The version was valid. - return {"published_id": published_id} - - else: - - # Bad request. - return "bad_version_number" - - else: - - # If all_versions has 0 length, then the - # the root ID does not exist at all. - # In this case, we have to return a failure flag - # because we cannot create a version for - # a root ID that does not exist. - return "non_root_id" - - def check_activation_credentials( - self, p_app_label, p_model_name, p_email, p_temp_identifier - ) -> bool: - """ - Simple existence check. - Checking whether or not a user exists and their - temp identifier matches. - Source: https://stackoverflow.com/a/9089028 - Source: https://docs.djangoproject.com/en/3.1/ref/models/querysets/#exists - """ - - user_info = apps.get_model( - app_label=p_app_label, model_name=p_model_name - ).objects.filter(email=p_email, temp_identifier=p_temp_identifier) - - if user_info.exists(): - - # The credentials exist, but is the request timely? - # Source: https://stackoverflow.com/a/7503368 - - # Take the time and add 2 days. - time_check = list(user_info.values_list("created", flat=True))[0] - - time_check = time_check + datetime.timedelta(hours=48) - - # Crappy timezone problems. - # Source: https://stackoverflow.com/a/25662061 - - # Is the time now less than the time check? - if datetime.datetime.now(datetime.timezone.utc) < time_check: - - # We can return that this user is OK to be activated. - return True - - else: - - # The time stamp has expired, so delete - # the entry in new_users. - user_info.delete() - - # We can't activate this user. - return False - - else: - - return False - - # Check that expiration dates are valid. - def check_expiration(self, dt_string): - """Split the string first.""" - try: - split_up = dt_string.split("-") - - if len(split_up) == 6: - - try: - - # Convert everything to integers. - split_up = [int(x) for x in split_up] - - exp_date = datetime.datetime( - split_up[0], - split_up[1], - split_up[2], - split_up[3], - split_up[4], - split_up[5], - ) - - if exp_date <= datetime.datetime.now(): - - return False - - except TypeError: - - return False - - else: - - return False - - except AttributeError: - - return False - - def get_api_models(self): - """Get all the ACCESSIBLE models in the API. - Source: https://stackoverflow.com/a/9407979 - """ - - api_models = [] - - # Define any tables to exclude here. - exclude = ["meta", "new_users"] - - for ct in ContentType.objects.all(): - m = ct.model_class() - - if m.__module__ == "api.models": - if m.__name__ not in exclude: - api_models.append(m.__name__) - - # Returns flat list... - return api_models - - def activate_account(self, p_email): - """p_email: which e-mail to activate. - - Activation means creating an entry in User. - - To comply with GDPR, we can't keep an e-mail - directly. So, split off the username part - of the e-mail and assign a random number. - """ - - valid_username = False - - while not valid_username: - new_username = p_email - if User.objects.filter(username=new_username): - valid_username = False - else: - valid_username = True - # We can't use the generic serializer here because of how - # django processes passwords. - # Source: https://docs.djangoproject.com/en/3.2/topics/auth/default/#changing-passwords - - # The password is also randomly generated. - new_password = uuid.uuid4().hex - - # Save the user. - # Source: https://docs.djangoproject.com/en/3.2/topics/auth/default/#creating-users - - user = User.objects.create_user(new_username) - - # Setting the password has to be done manually in - # order to encrypt it. - # Source: https://stackoverflow.com/a/39211961 - # Source: https://stackoverflow.com/questions/28347200/django-rest-http-400-error-on-getting-token-authentication-view - user.set_password(new_password) - - # Save the user. - user.save() - - # Automatically add the user to the bco_drafter and bco_publisher groups. - user.groups.add(Group.objects.get(name="bco_drafter")) - user.groups.add(Group.objects.get(name="bco_publisher")) - - # (OPTIONAL) Make a request to userdb on the portal so that - # the user's information can be stored there. - - # If a token was provided with the initial request, - # use it to make the update call to userdb. - token = ( - apps.get_model(app_label="api", model_name="new_users") - .objects.get(email=p_email) - .token - ) - - if token is not None: - # Send the new information to userdb. - # Get the user's information from the database. - uu = UserUtils.UserUtils() - # Set the headers. - # Source: https://docs.python-requests.org/en/master/user/quickstart/#custom-headers - headers = { - "Authorization": "JWT " + token, - "Content-type": "application/json; charset=UTF-8", - } - - # Set the data properly. - # Source: https://stackoverflow.com/a/56562567 - r = requests.post( - data=json.dumps(uu.get_user_info(username=new_username), default=str), - headers=headers, - url="http://127.0.0.1:8080/users/add_api/", - ) - - # Delete the record in the temporary table. - apps.get_model(app_label="api", model_name="new_users").objects.filter( - email=p_email - ).delete() - - # Return the username in a list, as this is - # easily checked for upstream (as opposed to - # some regex solution to check for username - # information). - return [new_username] - - # Messages associated with results from sub-requests. - def messages(self, parameters, p_content=False): - """TODO: abstract all of this up into the top level of the class. - - Define the return messages, if they don't - come in defined. - """ - - definable = [ - "errors", - "expiration_date", - "group", - "object_id", - "draft_object_id", - "constructed_obj_id", - "object_perms", - "prefix", - "published_id", - "table", - "username", - "contents", - "users_excluded", - ] - - for i in definable: - if i not in parameters: - parameters[i] = "" - - return { - "200_found": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The object with ID '" - + parameters["object_id"] - + "' was found on table '" - + parameters["table"] - + "'.", - "content": p_content, - }, - "200_OK_group_delete": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The group '" + parameters["group"] + "' was deleted.", - }, - "200_OK_group_modify": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The group '" - + parameters["group"] - + "' was succesfully modified.", - }, - "200_OK_object_delete": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The object with ID '" - + parameters["object_id"] - + "' was deleted.", - }, - "200_OK_object_read": { - "request_status": "SUCCESS", - "status_code": "200", - "contents": parameters["contents"], - "message": "The object with ID '" - + parameters["object_id"] - + "' was found on the server.", - }, - "200_OK": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The prefix '" + parameters["prefix"] + "' was deleted.", - }, - "200_OK_object_permissions": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Permissions for the object with ID '" - + parameters["object_id"] - + "' were found on the server.", - "object_id": parameters["object_id"], - "permissions": parameters["object_perms"], - }, - "200_OK_object_permissions_set": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Permissions for the object with ID '" - + parameters["object_id"] - + "' were set on the server.", - "object_id": parameters["object_id"], - }, - "200_OK_object_publish": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Successfully published '" - + parameters["published_id"] - + "' on the server.", - "published_id": parameters["published_id"], - }, - "200_OK_object_publish_draft_deleted": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Successfully published '" - + parameters["published_id"] - + "' on the server and the draft was deleted.", - "published_id": parameters["published_id"], - }, - "200_OK_object_publish_draft_not_deleted": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Successfully published '" - + parameters["published_id"] - + "' on the server and the draft was not deleted.", - "published_id": parameters["published_id"], - }, - "200_OK_prefix_delete": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Successfully deleted prefix '" - + parameters["prefix"] - + "'.", - }, - "200_OK_prefix_permissions_update": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "Successfully updated prefix permissions on prefix '" - + parameters["prefix"] - + "'.", - }, - "200_update": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The object with ID '" - + parameters["object_id"] - + "' was updated.", - }, - "201_create": { - "request_status": "SUCCESS", - "status_code": "201", - "message": "The object with ID '" - + parameters["object_id"] - + "' was created on the server.", - "object_id": parameters["object_id"], - }, - "201_prefix_modify": { - "request_status": "SUCCESS", - "status_code": "200", - "message": "The prefix '" + parameters["prefix"] + "' was updated.", - }, - "201_group_create": { - "request_status": "SUCCESS", - "status_code": "201", - "message": "The group '" - + parameters["group"] - + "' was successfully created.", - }, - "201_group_users_excluded": { - "request_status": "SUCCESS", - "status_code": "201", - "message": "The group '" - + parameters["group"] - + "' was successfully created, but the following users were excluded: " - + str(parameters["users_excluded"]), - }, - "201_prefix_create": { - "request_status": "SUCCESS", - "status_code": "201", - "message": "The prefix '" - + parameters["prefix"] - + "' was successfully created.", - }, - "202_Accepted": { - "request_status": "SUCCESS", - "status_code": "202", - "message": "The request you performed has been accepted.", - }, - "204_no_content": { - "request_status": "SUCCESS", - "status_code": "204", - "message": "The search you performed returned ZERO results.", - }, - "400_bad_request": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The request could not be processed with the parameters provided.", - }, - "400_bad_request_malformed_prefix": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The prefix '" - + parameters["prefix"] - + "' does not follow the naming rules for a prefix.", - }, - "400_bad_version_number": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The provided version number for this object is not greater than the number that would be generated automatically and therefore the request to publish was denied.", - }, - "400_invalid_expiration_date": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The expiration date '" - + parameters["expiration_date"] - + "' is not valid either because it does not match the required format 'YYYY-MM-DD-HH-MM-SS' or because it falls before the current time.", - }, - "400_non_publishable_object": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The object provided was not valid against the schema provided. See key 'errors' for specifics of the non-compliance.", - "errors": parameters["errors"], - }, - "400_non_root_id": { - "request_status": "FAILURE", - "status_code": "400", - "message": "The provided object ID does not contain a URI with a valid prefix.", - }, - "400_unspecified_error": { - "request_status": "FAILURE", - "status_code": "400", - "message": "An unspecified error occurred.", - }, - "401_prefix_unauthorized": { - "request_status": "FAILURE", - "status_code": "401", - "message": "The token provided does not have draft permissions for this prefix '" - + parameters["prefix"] - + "'.", - }, - "401_prefix_publish_unauthorized": { - "request_status": "FAILURE", - "status_code": "401", - "message": "The token provided does not have publish permissions for this prefix '" - + parameters["prefix"] - + "'.", - }, - "403_insufficient_permissions": { - "request_status": "FAILURE", - "status_code": "403", - "message": "The token provided does not have sufficient permissions for the requested object.", - }, - "403_requestor_is_not_prefix_owner": { - "request_status": "FAILURE", - "status_code": "403", - "message": "The token provided is not the owner of the prefix '" - + parameters["prefix"] - + "' and therefore permissions for the prefix cannot be changed in this request.", - }, - "403_invalid_token": { - "request_status": "FAILURE", - "status_code": "403", - "message": "The token provided was not able to be used on this object.", - }, - "404_group_not_found": { - "request_status": "FAILURE", - "status_code": "404", - "message": "The group '" - + parameters["group"] - + "' was not found on the server.", - }, - "404_missing_bulk_parameters": { - "request_status": "FAILURE", - "status_code": "404", - "message": "One or more missing optional parameters are required for this call to have an effect.", - }, - "404_missing_prefix": { - "request_status": "FAILURE", - "status_code": "404", - "message": "The prefix '" - + parameters["prefix"] - + "' was not found on the server.", - }, - "404_object_id": { - "request_status": "FAILURE", - "status_code": "404", - "message": "The object ID '" - + parameters["object_id"] - + "' was not found on the server.", - }, - "404_table": { - "request_status": "FAILURE", - "status_code": "404", - "message": "The table with name '" - + parameters["table"] - + "' was not found on the server.", - }, - "404_user_not_found": { - "request_status": "FAILURE", - "status_code": "404", - "message": "The user '" - + parameters["username"] - + "' was not found on the server.", - }, - "409_group_conflict": { - "request_status": "FAILURE", - "status_code": "409", - "message": "The provided group '" - + parameters["group"] - + "' has already been created on this server.", - }, - "409_prefix_conflict": { - "request_status": "FAILURE", - "status_code": "409", - "message": "The provided prefix '" - + parameters["prefix"] - + "' has already been created on this server.", - }, - "409_object_conflict": { - "request_status": "FAILURE", - "status_code": "409", - "message": "The provided object " - + parameters["object_id"] - + " has already been created on this server." - + " If you wish to publish a new version of this BCO try" - + " to save the DRAFT with a different version number, and" - + " then resubmit.", - }, - "409_draft_object_id_conflict": { - "request_status": "FAILURE", - "status_code": "409", - "message": "The provided object_id " - + parameters["object_id"] - + " does not match the saved draft object_id " - + parameters["draft_object_id"] - + ". Once a draft is created you can not change the object id.", - }, - "409_object_id_conflict": { - "request_status": "FAILURE", - "status_code": "409", - "message": "The provided object_id " - + parameters["object_id"] - + " does not match the constructed object_id " - + parameters["constructed_obj_id"] - + ".", - }, - "418_too_many_deleted": { - "request_status": "FAILURE", - "status_code": "418", - "message": "Only one object was expected to be deleted, but multiple were removed.", - }, - } - - # Publish an object. - def publish(self, owner_group, owner_user, prefix, publishable, publishable_id): - """Publish BCO - - Parameters - ---------- - owner_group: str - Name of owner group - owner_user: str - Name of owner user - prfx: str - publishable: api.models.BCO - publishable_id: dict - - Returns - ------- - """ - # publishable is a draft object. - - # Define the object naming information. - object_naming_info = settings.OBJECT_NAMING - - # Define a variable to hold all information - # about the published object. - published = {} - - # A new published object or an existing one? - if publishable_id == "new": - - # TODO: put new object ID logic in its own function - # like check_version_rules()... - - # Define a variable which will hold the constructed name. - constructed_name = "" - - # This section was breaking the production/test Db. The contents of `object_naming_info` - # are modifies somewhere else before here so that this IF/ELSE is not needed and causes - # a break in the code. - - # Create the constructed name based on whether or not - # we're on a production server. - # if settings.PRODUCTION == 'True': - - # constructed_name = object_naming_info['uri_regex'].replace( - # 'prod_root_uri', # WTF MAAAN - # object_naming_info['prod_root_uri'] - # ) - - # elif settings.PRODUCTION == 'False': - - constructed_name = object_naming_info["uri_regex"].replace( - "root_uri", object_naming_info["root_uri"] - ) - - constructed_name = constructed_name.replace("prefix", prefix) - - # Get rid of the rest of the regex for the name. - prefix_location = constructed_name.index(prefix) - prefix_length = len(prefix) - constructed_name = constructed_name[0 : prefix_location + prefix_length] - - # Get the object number counter from meta information about the prefix. - prefix_counter = prefix_table.objects.get(prefix=prefix) - - # Create the contents field. - published["contents"] = publishable - - # Create a new ID based on the prefix counter. - published["object_id"] = ( - constructed_name - + "_" - + "{:06d}".format(prefix_counter.n_objects) - + "/1.0" - ) - - # Make sure to create the object ID field in our draft. - published["contents"]["object_id"] = published["object_id"] - - # Django wants a primary key for the Group... - published["owner_group"] = owner_group - - # Django wants a primary key for the User... - published["owner_user"] = owner_user - - # The prefix is passed through. - published["prefix"] = prefix - - # Schema is hard-coded for now... - published["schema"] = "IEEE" - - # This is PUBLISHED. - published["state"] = "PUBLISHED" - - # Set the datetime properly. - published["last_update"] = timezone.now() - - # Publish. - self.write_object( - p_app_label="api", - p_model_name="BCO", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=published, - ) - - # Update the meta information about the prefix. - prefix_counter.n_objects = prefix_counter.n_objects + 1 - prefix_counter.save() - - # Successfuly saved the object. - return {"published_id": published["object_id"]} - - else: - # An object ID was provided, so go straight to publishing. - - # Create the contents field. - published["contents"] = publishable.contents - - # Set the object ID. - published["object_id"] = publishable_id - - # Make sure to create the object ID field in the BCO. - published["contents"]["object_id"] = publishable_id - - # Django wants a primary key for the Group... - published["owner_group"] = owner_group - - # Django wants a primary key for the User... - published["owner_user"] = owner_user - - # The prefix is passed through. - published["prefix"] = prefix - - # Schema is hard-coded for now... - published["schema"] = "IEEE" - - # Mark the object as published. - published["state"] = "PUBLISHED" - - # Set the datetime properly. - published["last_update"] = timezone.now() - - # Publish. - self.write_object( - p_app_label="api", - p_model_name="BCO", - p_fields=[ - "contents", - "last_update", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - p_data=publishable.contents, - ) - - # Successfully saved the object. - return {"published_id": published["object_id"]} - - # Write (update) either a draft or a published object to the database. - def write_object( - self, - p_app_label, - p_model_name, - p_fields, - p_data, - p_update=False, - p_update_field=False, - ): - - """Source: https://docs.djangoproject.com/en/3.1/topics/db/queries/#topics-db-queries-update - - Serialize our data.""" - serializer = getGenericSerializer( - incoming_model=apps.get_model( - app_label=p_app_label, model_name=p_model_name - ), - incoming_fields=p_fields, - ) - - serialized = serializer(data=p_data) - - # Save (update) it. - if p_update is False: - # Write a new object. - if serialized.is_valid(): - serialized.save() - return 1 - else: - print(serialized.errors) - return -1 - else: - # Update an existing object. - # apps.get_model( - # app_label = p_app_label, - # model_name = p_model_name - # ).objects.filter( - # object_id = p_data['object_id'] - # ).update( - # contents = p_data['contents'] - # ) - - objects_modified = ( - apps.get_model(app_label=p_app_label, model_name=p_model_name) - .objects.filter(object_id=p_data["object_id"]) - .update(contents=p_data["contents"]) - ) - - return objects_modified - - def convert_id_form(self, oi_root): - return oi_root.split("_")[0] + "{:06d}".format(int(oi_root.split("_")[1])) diff --git a/api/scripts/utilities/FileUtils.py b/api/scripts/utilities/FileUtils.py deleted file mode 100755 index 2fbb32a1..00000000 --- a/api/scripts/utilities/FileUtils.py +++ /dev/null @@ -1,167 +0,0 @@ -# --- SOURCES --- # - -# For finding files. -import glob - -# For writing. -import os - - -# --- MAIN --- # - - -class FileUtils: - def pathalizer(self, directory, pattern): - - # Description - # ----------- - - # Construct a search path with regex. - - # Arguments - # --------- - - # directory - # --------- - # - # Description: where to look within the project directory. - # Values: any folder - - # pattern - # ------- - # - # Description: the regex. - # Values: any regex - - # Outputs - # ------- - - # A directory + pattern string. - - # Kick back the string. - return os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - directory + pattern, - ) - - def create_files(self, payload, output_directory, file_extension): - - # Description - - # Write a list of files a list of files in a directory matching a regex. - - # Arguments - - # payload - # ---------------- - # - # Description: what are we writing? - # Values: must be a dictionary where the keys are *ORIGINAL* full file names and values are file contents. - - # output_directory - # ---------------- - # - # Description: where are we writing to? - # Values: any extant directory - MUST BE AN ABSOLUTE PATH - - # file_extension - # ---------------- - # - # Description: what extension are we appending to the *ORIGINAL* file name? - # Values: any string - - # Outputs - - # A list of files. - - # Construct the output path for each file and write. - for original_filename, contents in payload.items(): - with open( - self.pathalizer(output_directory, original_filename + file_extension), - mode="w", - ) as f: - f.write(contents) - - def find_files(self, input_directory, regex): - - # Description - - # Retrieve a list of files in a directory matching a regex. - - # Arguments - - # input_directory - # ---------------- - # - # Description: where are the files we're assigning? - # Values: any extant directory - MUST BE AN ABSOLUTE PATH - - # regex - # ---------------- - # - # Description: what regex are we using to search the directory? - # Values: any regex - - # Outputs - - # A list of matching files. - - # Search the input directory for matching files. - - # Source: https://stackoverflow.com/questions/39293968/python-how-do-i-search-directories-and-find-files-that-match-regex - # Source: https://stackoverflow.com/questions/30218802/get-parent-of-current-directory-from-python-script - - return glob.glob(self.pathalizer(input_directory, regex)) - - # Find the entire tree of a folder based on an extension. - def get_folder_tree_by_extension(self, search_folder, search_extension): - - # search_folder: where we're looking. - # search_extension: the extension we're looking for. - - # Source: https://www.tutorialspoint.com/python/os_walk.htm - - # Set the root directory. - root_directory = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), search_folder - ) - - # Create a dictionary to return. - returning = {"root_directory": root_directory, "paths": []} - - for root, dirs, files in os.walk(root_directory): - for name in files: - returning["paths"].append(os.path.join(root, name)) - for name in dirs: - returning["paths"].append(os.path.join(root, name)) - - returning["paths"] = [ - x for x in returning["paths"] if x.find(search_extension) != -1 - ] - - return returning - - # Find the entire tree of a folder, regardless of extension. - def get_folder_tree(self, search_folder): - - # search_folder: where we're looking. - - # Source: https://www.tutorialspoint.com/python/os_walk.htm - - # Set the root directory. - root_directory = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), search_folder - ) - - # Create a dictionary to return. - returning = {"root_directory": root_directory, "paths": []} - - for root, dirs, files in os.walk(root_directory): - for name in files: - returning["paths"].append(os.path.join(root, name)) - for name in dirs: - returning["paths"].append(os.path.join(root, name)) - - returning["paths"] = [x for x in returning["paths"] if 1] - - return returning diff --git a/api/scripts/utilities/JsonUtils.py b/api/scripts/utilities/JsonUtils.py deleted file mode 100755 index b34bac54..00000000 --- a/api/scripts/utilities/JsonUtils.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env python3 -"""JSON Utils - -For JSON parsing and schema validation. -""" - -import os -import sys -import json -import jsonref -import jsonschema -from simplejson.errors import JSONDecodeError -from requests.exceptions import ConnectionError as ErrorConnecting - - -def get_schema(schema_uri): - """Retrieve JSON Schema - - Parameters - ---------- - schema_uri : str - A URI that is used to pull the JSON schema for validation. - - Returns - ------- - schema : dict - A dictionary of the JSON schema definition, or detail on the error loading the schema. - """ - - try: - schema = jsonref.load_uri(schema_uri) - return schema - - except JSONDecodeError: - return {schema_uri: ["Failed to load extension schema. JSON Decode Error."]} - - except TypeError: - return {schema_uri: ["Failed to load extension schema. Invalid format."]} - - except ErrorConnecting: - return {schema_uri: ["Failed to load extension schema. Connection Error."]} - - -def validate(schema, json_object, results): - """BCO/extension Validator - - Parameters - ---------- - schema : dict - A dictionary of the JSON schema definition. - json_object : dict - A dictionary of the BCO/extension JSON for validation. - results : dict - A dictionary that is used to collect the validation results. - - Returns - ------- - results : dict - A dictionary that is used to collect the validation results. - """ - if "object_id" in json_object: - identifier = json_object["object_id"] - - if "extension_schema" in json_object: - identifier = json_object["extension_schema"] - - validator = jsonschema.Draft7Validator(schema) - errors = validator.iter_errors(json_object) - for error in errors: - values = "".join(f"[{v}]" for v in error.path) - results[identifier]["number_of_errors"] += 1 - if len(values) == 0: - error_string = {"top_level": error.message} - else: - error_string = {values: error.message} - results[identifier]["error_detail"].append(error_string) - - return results - - -def parse_bco(bco: dict, results: dict): - """BCO Parsing for Validation - - Parameters - ---------- - bco : dict - The BCO JSON to be processed for validation. - results : dict - A dictionary to be populated with the BCO validation results - - Returns - ------- - results : dict - A dictionary with the BCO validation results - """ - - identifier = bco["object_id"] - results[identifier] = {"number_of_errors": 0, "error_detail": []} - try: - spec_version = get_schema(bco["spec_version"]) - - except AttributeError: - file_path = os.path.dirname( - os.path.abspath("api/validation_definitions/IEEE/2791object.json") - ) - - ieee = "api/validation_definitions/IEEE/2791object.json" - with open(ieee, "r", encoding="utf-8") as file: - spec_version = jsonref.load( - file, base_uri=f"file://{file_path}/", jsonschema=True - ) - - except ErrorConnecting: - file_path = os.path.dirname( - os.path.abspath("api/validation_definitions/IEEE/2791object.json") - ) - - ieee = "api/validation_definitions/IEEE/2791object.json" - with open(ieee, "r", encoding="utf-8") as file: - spec_version = jsonref.load( - file, base_uri=f"file://{file_path}/", jsonschema=True - ) - - results = validate(spec_version, bco, results) - if "extension_domain" in bco.keys(): - if isinstance(bco["extension_domain"], list) is False: - results[identifier]["extension_domain"] = { - "number_of_errors": 1, - "error_detail": ["extension_doamin invalid"], - } - - return results - for extension in bco["extension_domain"]: - extension_id = extension["extension_schema"] - results[identifier][extension_id] = { - "number_of_errors": 0, - "error_detail": [], - } - extension_schema = get_schema(extension_id) - if extension_id in extension_schema: - results[identifier][extension_id] = { - "number_of_errors": 1, - "error_detail": extension_schema, - } - else: - results[identifier] = validate( - extension_schema, extension, results[identifier] - ) - if results[identifier][extension_id]["number_of_errors"] == 0: - results[identifier][extension_id]["error_detail"] = ["Extension Valid"] - - results[identifier]["number_of_errors"] += results[identifier][ - extension_id - ]["number_of_errors"] - - return results - - -class JsonUtils: - """Class Description - ----------------- - - These are methods for checking for valid JSON objects. - """ - - # Check for a set of keys. - def check_key_set_exists(self, data_pass, key_set): - """ - Arguments - --------- - - data_pass: the 'raw' data. - - Go over each key in the key set and see if it exists - the in request data. - - Returns - ------- - - None: all keys were present - dict: items 'error' and 'associated_key' - - Assume all keys are present. - """ - missing_keys = [] - - for current_key in key_set: - - # Was this key found? - try: - - data_pass[current_key] - - except: - - # Append the error. - missing_keys.append( - { - "error": "INVALID_" + current_key.upper() + "_FAILURE", - "associated_key": current_key, - "error_message": "Key " + current_key + " not found.", - } - ) - - # Return value is based on whether or not there were errors. - if not missing_keys: - return missing_keys - - # Check that what was provided was JSON. - def check_json_exists(self, data_pass, key_set): - - # Arguments - # -------- - - # data_pass: the 'raw' request data. - # key_set: the keys to check for JSON. - - # Simply check if what was provided was actually JSON. - - # Returns - # ------- - - # None: the provided data was JSON. - # JSON_CONVERSION_ERROR: the provided data was not JSON. - - # Assume all data is JSON. - not_json = [] - - for current_key in key_set: - - # Was this key found? - try: - - # First, try to convert the payload string into a JSON object. - json.loads(s=data_pass[current_key]) - - except: - - # Append the error. - not_json.append( - {"error": "JSON_CONVERSION_ERROR", "associated_key": current_key} - ) - - # Return value is based on whether or not there were errors. - if not_json is not []: - return not_json - - def load_schema_refs(self, schema_pass): - - # Load the references for a given schema. - - # Arguments - # --------- - - # schema_pass: the schema for which we are loading references. - - # The jsonschema documentation doesn't give any examples. - # Source: https://www.programcreek.com/python/example/83374/jsonschema.RefResolver - - # Define the resolver. - resolver = jsonschema.RefResolver(referrer=schema_pass, base_uri="./") - - def check_object_against_schema(self, object_pass, schema_pass): - - # Check for schema compliance. - - # Arguments - # --------- - - # object_pass: the object being checked. - # schema_pass: the schema to check against. - - # Check the object against the provided schema. - - # Define a validator. - validator = jsonschema.Draft7Validator(schema_pass) - - # Define the errors list. - errors = validator.iter_errors(object_pass) - error_string = "" - - # We have to use a bit of tricky output re-direction, see https://www.kite.com/python/answers/how-to-redirect-print-output-to-a-variable-in-python - - old_stdout = sys.stdout - new_stdout = io.StringIO() - sys.stdout = new_stdout - - # We ALSO have to use a bit of tricky flagging to indicate - # that there were errors since generators can't use the normal len(list(generator)) idiom. - error_flag = 0 - - for e in errors: - - # There is at least 1 error. - error_flag = 1 - - # These aren't deleted when preparing the code for production... - print(e) - print("=================") - - error_string = error_string + new_stdout.getvalue() - sys.stdout = old_stdout - - # Return based on whether or not there were any errors. - if error_flag != 0: - - # Collapse and return the errors. - return error_string diff --git a/api/scripts/utilities/RequestUtils.py b/api/scripts/utilities/RequestUtils.py deleted file mode 100755 index be305c58..00000000 --- a/api/scripts/utilities/RequestUtils.py +++ /dev/null @@ -1,30 +0,0 @@ -# Utilities -import json -from . import JsonUtils - -# For checking request formats -from django.conf import settings - - -class RequestUtils: - - # Check for a valid template. - def check_request_templates(self, method, request): - - # Arguments - - # method: one of DELETE, GET, PATCH, POST - # request: the request itself - - # We need to check for a valid template. - - # Define the request templates. - request_templates = settings.REQUEST_TEMPLATES - - # Subset the templates to the ones for this request method. - request_templates = request_templates[method] - - # Validate against the templates. - return JsonUtils.JsonUtils().check_object_against_schema( - object_pass=request, schema_pass=request_templates - ) diff --git a/api/scripts/utilities/ResponseUtils.py b/api/scripts/utilities/ResponseUtils.py deleted file mode 100755 index 74b36a87..00000000 --- a/api/scripts/utilities/ResponseUtils.py +++ /dev/null @@ -1,53 +0,0 @@ -class ResponseUtils: - - # Class Description - # ----------------- - - # These are methods to help with sending back a (formatted) response. - - # Clean up the response string. - def beautify_error_set(self, errors): - - # Arguments - # --------- - - # errors: a list of lists, with each list having items in dictionary format {item_id: number, errors: {error: string, associated_key: string, error_message: string}} - - # Returns - # ------- - - # A line for each item_id and the associated errors. - - # Define a list which will be collapsed to return - # an error string. - error_string = [] - - # Go through each error set. - for item_index in range(0, len(errors)): - - # Create the error header for ID. - string_helper = ( - "Errors for item ID: " - + str(item_index) - + "\n-------------------------\n" - ) - - # Define a list of all errors which will be collapsed. - all_errors = [] - - # Now create each line of the error report. - for error_subset in errors[item_index]: - - # Append this error. - all_errors.append( - error_subset["error"] + ": " + error_subset["error_message"] - ) - - # Collapse the errors into new lines. - string_helper = string_helper + "\n".join(all_errors) - - # Append to the error string. - error_string.append(string_helper) - - # Collapse all errors for all items and return. - return "\n".join(error_string) diff --git a/api/scripts/utilities/SettingsUtils.py b/api/scripts/utilities/SettingsUtils.py deleted file mode 100755 index 83245feb..00000000 --- a/api/scripts/utilities/SettingsUtils.py +++ /dev/null @@ -1,146 +0,0 @@ -# Utilities -from . import FileUtils - -# For testing only. -import json -import os - -# For loading schema. -import jsonref - - -class SettingsUtils: - - # Class Description - # ----------------- - - # These are methods for initializing the program. - - # Create a dictionary to hold schema information. - def load_schema_local(self, search_parameters, mode): - - # search_parameters: dictionary of file search locations and file endings. - - # mode: loading for requests or for validation? - - # A more advanced version of this would set the schema $id based on - # where the schema resides, negating the need for manual entry of the $id. - - # Define a dictionary to hold top-level folder/file information. - schema = {} - - # Iterate over the search parameters. - for folder, extension in search_parameters.items(): - raw_files = FileUtils.FileUtils().get_folder_tree_by_extension( - search_folder=folder, search_extension=extension - ) - - # We now have the files, so load the schema. - - # First, initialize schema. - schema[folder] = {} - - # Now go over each path. - for current_file in raw_files["paths"]: - - # We can now set keys. - with open(current_file, mode="r") as f: - - schema[folder][current_file] = json.load(f) - - # Set the id. - schema[folder][current_file]["$id"] = "file:" + current_file - - # Now go through and define the absolute reference paths. - # We have to do this recursively as we do not know - # where we will see "$ref$. - - # The jsonschema library does NOT support relative references - # within the document, see https://json-schema.org/understanding-json-schema/structuring.html#using-id-with-ref - # Therefore, we must manually resolve the paths. This is actually - # a stronger solution, however, as it allows for referencing - # schema anywhere within the project directory as opposed to - # referencing schema within the same folder level only (as is - # the case with the relative reference examples given at the link above). - - # The schema_files are separated at the top level - # by the folders provided in search_parameters. - - # Source: https://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values - def set_refs(d, root_folder): - - # Set the keys. - if "$ref" in d: - - # If the reference is internal to the document, ignore it. - # Otherwise, define the reference. - if d["$ref"][0] != "#": - d["$ref"] = "file:" + os.getcwd() + "/" + root_folder + d["$ref"] - - for k, v in d.items(): - if isinstance(v, dict): - set_refs(v, root_folder) - - # Kick it back. - return d - - # A more advanced implementation would allow for referencing schema - # outside of the hosting folder. - - # Are we defining for requests or for validations? - - if mode == "requests": - - # Call set refs by each top-level folder. - for folder, contents in schema.items(): - schema[folder] = set_refs(schema[folder], root_folder="api/") - - elif mode == "validations": - - # Call set refs by each top-level folder. - for file, contents in schema["validation_definitions/"].items(): - - # Split the file name up to help construct the root folder. - file_name_split = file.split("/") - - # Where is the 'validation_definitions/' item? - vd_index = file_name_split.index("validation_definitions") - - # Collapse everything after this index but before the file name. - collapsed = ( - "/".join(file_name_split[vd_index + 1 : len(file_name_split) - 1]) - + "/" - ) - - # Set the name. - schema["validation_definitions/"][file] = set_refs( - schema["validation_definitions/"][file], - root_folder="api/validation_definitions/" + collapsed, - ) - - # Return the public-facing schema AND the processed schema? - return schema - - # Define the schema for each request type. - def define_request_schema(self, schema): - - # schema: everything found in self.load_local_schema. - - # Create a dictionary to return all the request types. - returning = {"DELETE": {}, "GET": {}, "PATCH": {}, "POST": {}} - - # Now go through the schema to locate the request information. - for k, v in schema.items(): - - # If the object title is a given request type, update returning. - if v["title"] == "DELETE": - returning["DELETE"] = v - elif v["title"] == "GET": - returning["GET"] = v - elif v["title"] == "PATCH": - returning["PATCH"] = v - elif v["title"] == "POST": - returning["POST"] = v - - # Kick it back. - return returning diff --git a/api/scripts/utilities/UserUtils.py b/api/scripts/utilities/UserUtils.py deleted file mode 100755 index 4ff5b375..00000000 --- a/api/scripts/utilities/UserUtils.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env python3 -"""User Utilities -Functions for operations with Users -""" - -from django.conf import settings -from django.contrib.auth.models import Group, User -from django.contrib.auth.models import Permission -from rest_framework.authtoken.models import Token - - -class UserUtils: - """ - Methods for interacting with user information. - - Attributes - ---------- - - Methods - ------- - - """ - - def check_permission_exists(self, perm): - """Does the permission exist?""" - return Permission.objects.filter(codename=perm).exists() - - def check_group_exists(self, name): - """Does the user exist?""" - return Group.objects.filter(name=name).exists() - - def check_user_exists(self, user_name): - """Does the user exist?""" - return User.objects.filter(username=user_name).exists() - - def check_user_in_group(self, user_name, group_name): - """Check if a user is in a group. - - First check that the user exists. - Then check that the groups exists. - Finally, check that the user is in - the group. - - Try/except is preferred because - the query is only run one time. - """ - - try: - user = User.objects.get(username=user_name).username - try: - group = Group.objects.get(name=group_name).name - if group_name in list( - User.objects.get(username=user_name).groups.values_list( - "name", flat=True - ) - ): - return {"user": user, "group": group} - else: - return False - except Group.DoesNotExist: - return False - except User.DoesNotExist: - return False - - def check_user_owns_prefix(self, user_name, prfx): - """Check if a user owns a prefix.""" - - return Prefix.objects.filter(owner_user=user_name, prefix=prfx).exists() - - def get_user_groups_by_token(self, token): - """Takes token to give groups. - First, get the groups for this token. - This means getting the user ID for the token, - then the username.""" - - user_id = Token.objects.get(key=token).user_id - username = User.objects.get(id=user_id) - - # Get the groups for this username (at a minimum the user - # group created when the account was created should show up). - return Group.objects.filter(user=username) - - def get_user_groups_by_username(self, user_name): - """Takes usernames to give groups. - Get the groups for this username (at a minimum the user - group created when the account was created should show up). - """ - return Group.objects.filter(user=User.objects.get(username=user_name)) - - # Get all user information. - def get_user_info(self, username): - """Get User Info - - Arguments - --------- - - username: the username. - - Returns - ------- - - A dict with the user information. - - Slight error the the django-rest-framework documentation - as we need the user id and not the username. - Source: https://www.django-rest-framework.org/api-guide/authentication/#generating-tokens - No token creation as the user has to specifically - confirm their account before a token is created - for them. - - Get the other information for this user. - Source: https://stackoverflow.com/a/48592813 - First, get the django-native User object. - Group permissions - Get each group's permissions separately, - then append them to other_info. - Try to get the permissions for the user, - split by user and group. - Define a dictionary to hold the permissions. - First, by the user. - Keep the model and the codename. - Next, by the group. - username.get_group_permissions() sheds the group - name (a design flaw in django), so we have to - invoke some inefficient logic here. - In general, django isn't good at retaining - groups and permissions in one step. - See the first comment at https://stackoverflow.com/a/27538767 - for a partial solution. - Alternatively, in models.py, we could define - our own permissions class, but this is a bit - burdensome. - Add the group name automatically. - """ - user_id = User.objects.get(username=username).pk - token = Token.objects.get(user=user_id) - other_info = { - "permissions": {}, - "account_creation": "", - "account_expiration": "", - } - - user = User.objects.get(username=username) - user_perms = {"user": [], "groups": []} - - for permission in user.user_permissions.all(): - if permission.name not in user_perms["user"]: - user_perms["user"].append(permission.name) - - for group in user.groups.all(): - if group.name not in user_perms["groups"]: - user_perms["groups"].append(group.name) - for permission in Permission.objects.filter(group=group): - if permission.name not in user_perms["user"]: - user_perms["user"].append(permission.name) - - other_info["permissions"] = user_perms - - other_info["account_creation"] = user.date_joined - return { - "hostname": settings.ALLOWED_HOSTS[0], - "human_readable_hostname": settings.HUMAN_READABLE_HOSTNAME, - "public_hostname": settings.PUBLIC_HOSTNAME, - "token": token.key, - "username": user.username, - "other_info": other_info, - } - - def prefixes_for_user(self, user_object): - """Prefix for a given user. - Simple function to return prefixes - that a user has ANY permission on. - - Recall that having any permission on - a prefix automatically means viewing - permission. - """ - - return list(set([i.split("_")[1] for i in user_object.get_all_permissions()])) - - def prefix_perms_for_user( - self, user_object, flatten=True, specific_permission=None - ): - """Prefix permissions for a given user.""" - - if specific_permission is None: - specific_permission = [ - "add", - "change", - "delete", - "view", - "draft", - "publish", - ] - - prefixed = self.get_user_info(user_object)["other_info"]["permissions"] - permissions = [] - for pre in prefixed["user"]: - permissions.append(Permission.objects.get(name=pre).codename) - - return permissions - - # # To store flattened permissions - # flat_perms = [] - - # # We only need the permissions that are specific - # # to the bco model. - - # bco_specific = { - # 'user' : { }, - # 'groups': { } - # } - - # if 'bco' in prefixed['user']: - # if flatten: - # flat_perms = prefixed['user']['bco'] - # else: - # bco_specific['user']['bco'] = prefixed['user']['bco'] - # else: - # if not flatten: - # bco_specific['user']['bco'] = { } - - # for k, v in prefixed['groups']: - # if 'bco' in prefixed['groups'][k]: - # if flatten: - # for perm in v['bco']: - # if perm not in flat_perms: - # flat_perms.append(perm) - # else: - # bco_specific['groups'][k] = { - # 'bco': v['bco'] - # } - # else: - # bco_specific['groups'][k] = { } - - # # Get the permissions. - # # Source: https://stackoverflow.com/a/952952 - - # # Flatten the permissions so that we can - # # work with them more easily. - - # # Return based on what we need. - # if flatten == True: - - # # Only unique permissions are returned. - # return flat_perms - - # elif flatten == False: - - # return bco_specific - - def user_from_request(self, request): - """Returns a user object from a request. - - Parameters - ---------- - request: rest_framework.request.Request - Django request object. - - Returns - ------- - django.contrib.auth.models.User - """ - - user_id = Token.objects.get( - key=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - ).user_id - return User.objects.get(id=user_id) diff --git a/api/serializers.py b/api/serializers.py deleted file mode 100755 index f7b337cb..00000000 --- a/api/serializers.py +++ /dev/null @@ -1,27 +0,0 @@ -from rest_framework import serializers - - -# ----- Request Serializers ----- # - - -# Serializers must be abstracted in order to use abstracted models. -# Source (last solution): https://stackoverflow.com/questions/33137165/django-rest-framework-abstract-class-serializer - -# Base serializers to be inherited by each model. - -# Abstract so that any model can be used. - -# Source (4th response): https://stackoverflow.com/questions/30831731/create-a-generic-serializer-with-a-dynamic-model-in-meta - - -def getGenericSerializer(incoming_model, incoming_fields): - class GenericObjectSerializer(serializers.ModelSerializer): - - # Arguments - # incoming_table: the table to write to. - - class Meta: - model = incoming_model - fields = incoming_fields - - return GenericObjectSerializer diff --git a/api/signals.py b/api/signals.py deleted file mode 100644 index 486aaf9d..00000000 --- a/api/signals.py +++ /dev/null @@ -1,116 +0,0 @@ -# Source: https://stackoverflow.com/a/42744626/5029459 - - -def populate_models(sender, **kwargs): - """Initial DB setup""" - - from api.models import BCO - from api.model.groups import GroupInfo - from api.scripts.utilities import DbUtils - - # The BCO groups need to be created FIRST because - # models.py listens for user creation and automatically - # adds any new user to bco_drafter and bco_publishers. - from django.contrib.auth.models import Group, Permission, User - - # # Set permissions for all of the groups. - # # Source: https://stackoverflow.com/a/18797715/5029459 - # from django.contrib.auth.models import Permission - # from django.contrib.contenttypes.models import ContentType - - # Custom publishing permissions which use the model name. - # Source: https://stackoverflow.com/a/9940053/5029459 - - # Create a bco drafter and publisher if they don't exist. - - # The groups are automatically created for these two users - # in models.py - - # NO password is set here... - if User.objects.filter(username="bco_drafter").count() == 0: - User.objects.create_user(username="bco_drafter") - - if User.objects.filter(username="bco_publisher").count() == 0: - User.objects.create_user(username="bco_publisher") - - # BCO is the anon (public) prefix. - - # Note that user creation is listened for in - # models.py by associate_user_group. - - # Create the anonymous user if they don't exist. - if User.objects.filter(username="anon").count() == 0: - User.objects.create_user(username="anon") - - # Create an administrator if they don't exist. - if User.objects.filter(username="wheel").count() == 0: - User.objects.create_superuser(username="wheel", password="wheel") - - # Make bco_publisher the group owner of the prefix 'BCO'. - if BCO.objects.filter(prefix="BCO").count() == 0: - # Django wants a primary key for the Group... - group = Group.objects.get(name="bco_publisher").name - - # Django wants a primary key for the User... - user = User.objects.get(username="bco_publisher").username - - DbUtils.DbUtils().write_object( - p_app_label="api", - p_model_name="Prefix", - p_fields=["created_by", "owner_group", "owner_user", "prefix"], - p_data={ - "created_by": user, - "owner_group": group, - "owner_user": user, - "prefix": "BCO", - }, - ) - - # Create the default (non-anon, non-wheel) groups if they don't exist. - # Group administrators - if Group.objects.filter(name="group_admins").count() == 0: - Group.objects.create(name="group_admins") - GroupInfo.objects.create( - delete_members_on_group_deletion=False, - description="Group administrators", - group=Group.objects.get(name="group_admins"), - max_n_members=-1, - owner_user=User.objects.get(username="wheel"), - ) - # Create the permissions for group administrators. - for perm in ["add", "change", "delete", "view"]: - - # Permissions already come with the system, - # so just associated them. - - # Give the group administrators the permissions. - Group.objects.get(name="group_admins").permissions.add( - Permission.objects.get(codename=perm + "_group") - ) - - # Prefix administrators - if Group.objects.filter(name="prefix_admins").count() == 0: - Group.objects.create(name="prefix_admins") - GroupInfo.objects.create( - delete_members_on_group_deletion=False, - description="Prefix administrators", - group=Group.objects.get(name="prefix_admins"), - max_n_members=-1, - owner_user=User.objects.get(username="wheel"), - ) - - # Create the permissions for prefix administrators. - for perm in ["add", "change", "delete", "view"]: - - # Permissions already come with the system, - # so just associated them. - - # Give the group administrators the permissions. - Group.objects.get(name="prefix_admins").permissions.add( - Permission.objects.get(codename=perm + "_prefix") - ) - - # Associate wheel with all groups. - group = Group.objects.all() - for g in group: - User.objects.get(username="wheel").groups.add(g) diff --git a/api/templates/api/account_activation_message.html b/api/templates/api/account_activation_message.html deleted file mode 100644 index b6a8e816..00000000 --- a/api/templates/api/account_activation_message.html +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - Portal Account Activation - - -
- {% if activation_success == True %} -

Successful activation! You may close this window or open Portal in a new tab.

- - {% else %} -

Unsuccessful activation! The account may not have been requested or may have already been activated. Please request another activation e-mail on the Portal.

- {% endif %} - -
- - \ No newline at end of file diff --git a/api/urls.py b/api/urls.py deleted file mode 100755 index 65f5bd65..00000000 --- a/api/urls.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python3 -"""BCODB URLs - -URL access points for API -""" - -# For importing configuration files -import configparser -from django.conf import settings - -# For favicon and any other static files -from django.urls import path, re_path -from django.contrib.staticfiles.storage import staticfiles_storage -from django.views.generic.base import RedirectView - -from rest_framework import permissions -from drf_yasg.views import get_schema_view -from drf_yasg import openapi - -from api.views import ( - ApiAccountsActivateUsernameTempIdentifier, - ApiAccountsDescribe, - ApiAccountsNew, - ApiGroupsCreate, - ApiGroupsInfo, - ApiGroupsDelete, - ApiGroupsModify, - ApiObjectsDraftsCreate, - ApiObjectsDraftsModify, - ApiObjectsDraftsPermissions, - ApiObjectsDraftsPermissionsSet, - ApiObjectsDraftsPublish, - ApiObjectsDraftsRead, - ApiObjectsPublished, - ApiObjectsSearch, - ApiObjectsToken, - ApiPrefixesCreate, - ApiPrefixesDelete, - ApiPrefixesPermissionsSet, - ApiPrefixesToken, - ApiPrefixesTokenFlat, - ApiPrefixesModify, - ApiObjectsPublish, - ApiObjectsDraftsToken, - ApiPublicDescribe, - DraftObjectId, - ObjectIdRootObjectId, - ObjectIdRootObjectIdVersion, - ValidateBCO, -) - -# Load the server config file. -server_config = configparser.ConfigParser() -server_config.read(settings.BASE_DIR + "/server.conf") - -PUBLISH_ONLY = server_config["PUBLISHONLY"]["publishonly"] -VERSION = server_config["VERSION"]["version"] - -ShcemaView = get_schema_view( - openapi.Info( - title="BioCompute Object Data Base API (BCODB API)", - default_version=VERSION, - description="A web application that can be used to create, store and " - "edit BioCompute objects based on BioCompute schema described " - "in the BCO specification document.", - terms_of_service="https://github.com/biocompute-objects/bco_api/blob/master/LICENSE", - contact=openapi.Contact(email="object.biocompute@gmail.com"), - license=openapi.License(name="MIT License"), - ), - public=True, - permission_classes=(permissions.AllowAny,), -) - -urlpatterns = [] - -# Do we have a publish-only server? -if PUBLISH_ONLY == "True": - urlpatterns = [ - re_path( - r"^api/doc(?P\.json|\.yaml)$", - ShcemaView.without_ui(cache_timeout=0), - name="schema-json", - ), - path( - "api/docs/", - ShcemaView.with_ui("swagger", cache_timeout=0), - name="schema-swagger-ui", - ), - path( - "api/redocs/", - ShcemaView.with_ui("redoc", cache_timeout=0), - name="schema-redoc", - ), - path("", ObjectIdRootObjectId.as_view()), - path( - "/", - ObjectIdRootObjectIdVersion.as_view(), - ), - path("api/objects/publish/", ApiObjectsPublish.as_view()), - path("api/objects/published/", ApiObjectsPublished.as_view()), - path("api/public/describe/", ApiPublicDescribe.as_view()), - ] - -elif PUBLISH_ONLY == "False": - urlpatterns = [ - re_path( - r"^api/docs(?P\.json|\.yaml)$", - ShcemaView.without_ui(cache_timeout=0), - name="schema-json", - ), - path( - "favicon.ico", - RedirectView.as_view(url=staticfiles_storage.url("img/favicon.ico")), - ), - path( - "api/docs/", - ShcemaView.with_ui("swagger", cache_timeout=0), - name="schema-swagger-ui", - ), - path( - "api/redocs/", - ShcemaView.with_ui("redoc", cache_timeout=0), - name="schema-redoc", - ), - path("/DRAFT", DraftObjectId.as_view()), - path( - "/", - ObjectIdRootObjectIdVersion.as_view(), - ), - path("", ObjectIdRootObjectId.as_view()), - path( - "api/accounts/activate//", - ApiAccountsActivateUsernameTempIdentifier.as_view(), - ), - path("api/accounts/describe/", ApiAccountsDescribe.as_view()), - path("api/accounts/new/", ApiAccountsNew.as_view()), - path("api/groups/group_info/", ApiGroupsInfo.as_view()), - path("api/groups/create/", ApiGroupsCreate.as_view()), - path("api/groups/delete/", ApiGroupsDelete.as_view()), - path("api/groups/modify/", ApiGroupsModify.as_view()), - path("api/objects/drafts/create/", ApiObjectsDraftsCreate.as_view()), - path("api/objects/drafts/modify/", ApiObjectsDraftsModify.as_view()), - path("api/objects/drafts/permissions/", ApiObjectsDraftsPermissions.as_view()), - path( - "api/objects/drafts/permissions/set/", - ApiObjectsDraftsPermissionsSet.as_view(), - ), - path("api/objects/drafts/publish/", ApiObjectsDraftsPublish.as_view()), - path("api/objects/drafts/read/", ApiObjectsDraftsRead.as_view()), - path("api/objects/drafts/token/", ApiObjectsDraftsToken.as_view()), - path("api/objects/publish/", ApiObjectsPublish.as_view()), - path("api/objects/search/", ApiObjectsSearch.as_view()), - path("api/objects/validate/", ValidateBCO.as_view()), - path("api/objects/token/", ApiObjectsToken.as_view()), - path("api/objects/published/", ApiObjectsPublished.as_view()), - path("api/prefixes/create/", ApiPrefixesCreate.as_view()), - path("api/prefixes/delete/", ApiPrefixesDelete.as_view()), - path("api/prefixes/permissions/set/", ApiPrefixesPermissionsSet.as_view()), - path("api/prefixes/token/", ApiPrefixesToken.as_view()), - path("api/prefixes/token/flat/", ApiPrefixesTokenFlat.as_view()), - path("api/prefixes/modify/", ApiPrefixesModify.as_view()), - path("api/public/describe/", ApiPublicDescribe.as_view()), - ] diff --git a/api/validation_definitions/IEEE_sub/IEEE2791-2020.schema b/api/validation_definitions/IEEE_sub/IEEE2791-2020.schema deleted file mode 100755 index 2506be6b..00000000 --- a/api/validation_definitions/IEEE_sub/IEEE2791-2020.schema +++ /dev/null @@ -1,178 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "type": "object", - "title": "Base type for all IEEE-2791 Objects", - "description": "All IEEE-2791 object types must adhear to this type in order to be compliant with IEEE-2791 standard", - "required": [ - "object_id", - "spec_version", - "etag", - "provenance_domain", - "usability_domain", - "description_domain", - "execution_domain", - "io_domain" - ], - "definitions": { - "object_id": { - "type": "string", - "description": "A unique identifier that should be applied to each IEEE-2791 Object instance, generated and assigned by a IEEE-2791 database engine. IDs should never be reused" - }, - "uri": { - "type": "object", - "description": "Any of the four Resource Identifers defined at https://tools.ietf.org/html/draft-handrews-json-schema-validation-01#section-7.3.5", - "additionalProperties": false, - "required": [ - "uri" - ], - "properties": { - "filename": { - "type": "string" - }, - "uri": { - "type": "string", - "format": "uri" - }, - "access_time": { - "type": "string", - "description": "Time stamp of when the request for this data was submitted", - "format": "date-time" - }, - "sha1_checksum": { - "type": "string", - "description": "output of hash function that produces a message digest", - "pattern": "[A-Za-z0-9]+" - } - } - }, - "contributor": { - "type": "object", - "description": "Contributor identifier and type of contribution (determined according to PAV ontology) is required", - "required": [ - "contribution", - "name" - ], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Name of contributor", - "examples": [ - "Charles Darwin" - ] - }, - "affiliation": { - "type": "string", - "description": "Organization the particular contributor is affiliated with", - "examples": [ - "HMS Beagle" - ] - }, - "email": { - "type": "string", - "description": "electronic means for identification and communication purposes", - "examples": [ - "name@example.edu" - ], - "format": "email" - }, - "contribution": { - "type": "array", - "description": "type of contribution determined according to PAV ontology", - "reference": "https://doi.org/10.1186/2041-1480-4-37", - "items": { - "type": "string", - "enum": [ - "authoredBy", - "contributedBy", - "createdAt", - "createdBy", - "createdWith", - "curatedBy", - "derivedFrom", - "importedBy", - "importedFrom", - "providedBy", - "retrievedBy", - "retrievedFrom", - "sourceAccessedBy" - ] - } - }, - "orcid": { - "type": "string", - "description": "Field to record author information. ORCID identifiers allow for the author to curate their information after submission. ORCID identifiers must be valid and must have the prefix ‘https://orcid.org/’", - "examples": [ - "http://orcid.org/0000-0002-1825-0097" - ], - "format": "uri" - } - } - } - }, - "additionalProperties": false, - "properties": { - "object_id": { - "$ref": "#/definitions/object_id", - "readOnly": true - }, - "spec_version": { - "type": "string", - "description": "Version of the IEEE-2791 specification used to define this document", - "examples": [ - "https://w3id.org/ieee/ieee-2791-schema/" - ], - "readOnly": true, - "format": "uri" - }, - "etag": { - "type": "string", - "description": "See https://tools.ietf.org/html/rfc7232#section-2.1 for full description. It is recommended that the ETag be deleted or updated if the object file is changed (except in cases using weak ETags in which the entirety of the change comprises a simple re-writing of the JSON).", - "examples": [ - "5986B05969341343E77A95B4023600FC8FEF48B7E79F355E58B0B404A4F50995" - ], - "readOnly": true, - "pattern": "^([A-Za-z0-9]+)$" - }, - "provenance_domain": { - "$ref": "domains/provenance_domain.json" - }, - "usability_domain": { - "$ref": "domains/usability_domain.json" - }, - "extension_domain": { - "type": "array", - "description": "An optional domain that contains user-defined fields.", - "items":{ - "required":[ - "extension_schema" - ], - "additionalProperties": true, - "properties": { - "extension_schema":{ - "title": "Extension Schema", - "description": "resolving this URI should provide this extension's JSON Schema", - "type": "string", - "format": "uri" - } - } - } - }, - "description_domain": { - "$ref": "domains/description_domain.json" - }, - "execution_domain": { - "$ref": "domains/execution_domain.json" - }, - "parametric_domain": { - "$ref": "domains/parametric_domain.json" - }, - "io_domain": { - "$ref": "domains/io_domain.json" - }, - "error_domain": { - "$ref": "domains/error_domain.json" - } - } -} \ No newline at end of file diff --git a/api/validation_definitions/IEEE_sub/domains/description_domain.json b/api/validation_definitions/IEEE_sub/domains/description_domain.json deleted file mode 100755 index a9eac520..00000000 --- a/api/validation_definitions/IEEE_sub/domains/description_domain.json +++ /dev/null @@ -1,165 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/description_domain.json", - "type": "object", - "title": "Description Domain", - "description": "Structured field for description of external references, the pipeline steps, and the relationship of I/O objects.", - "required": [ - "keywords", - "pipeline_steps" - ], - "properties": { - "keywords": { - "type": "array", - "description": "Keywords to aid in search-ability and description of the object.", - "items": { - "type": "string", - "description": "This field should take free text value using common biological research terminology.", - "examples": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ] - } - }, - "xref": { - "type": "array", - "description": "List of the databases or ontology IDs that are cross-referenced in the IEEE-2791 Object.", - "items": { - "type": "object", - "description": "External references are stored in the form of prefixed identifiers (CURIEs). These CURIEs map directly to the URIs maintained by Identifiers.org.", - "reference": "https://identifiers.org/", - "required": [ - "namespace", - "name", - "ids", - "access_time" - ], - "properties": { - "namespace": { - "type": "string", - "description": "External resource vendor prefix", - "examples": [ - "pubchem.compound" - ] - }, - "name": { - "type": "string", - "description": "Name of external reference", - "examples": [ - "PubChem-compound" - ] - }, - "ids": { - "type": "array", - "description": "List of reference identifiers", - "items": { - "type": "string", - "description": "Reference identifier", - "examples": [ - "67505836" - ] - } - }, - "access_time": { - "type": "string", - "description": "Date and time the external reference was accessed", - "format": "date-time" - } - } - } - }, - "platform": { - "type": "array", - "description": "reference to a particular deployment of an existing platform where this IEEE-2791 Object can be reproduced.", - "items": { - "type": "string", - "examples": [ - "hive" - ] - } - }, - "pipeline_steps": { - "type": "array", - "description": "Each individual tool (or a well defined and reusable script) is represented as a step. Parallel processes are given the same step number.", - "items": { - "additionalProperties": false, - "type": "object", - "required": [ - "step_number", - "name", - "description", - "input_list", - "output_list" - ], - "properties": { - "step_number": { - "type": "integer", - "description": "Non-negative integer value representing the position of the tool in a one-dimensional representation of the pipeline." - }, - "name": { - "type": "string", - "description": "This is a recognized name of the software tool", - "examples": [ - "HIVE-hexagon" - ] - }, - "description": { - "type": "string", - "description": "Specific purpose of the tool.", - "examples": [ - "Alignment of reads to a set of references" - ] - }, - "version": { - "type": "string", - "description": "Version assigned to the instance of the tool used corresponding to the upstream release.", - "examples": [ - "1.3" - ] - }, - "prerequisite": { - "type": "array", - "description": "Reference or required prereqs", - "items": { - "type": "object", - "description": "Text value to indicate a package or prerequisite for running the tool used.", - "required": [ - "name", - "uri" - ], - "properties": { - "name": { - "type": "string", - "description": "Public searchable name for reference or prereq.", - "examples": [ - "Hepatitis C virus genotype 1" - ] - }, - "uri": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - }, - "input_list": { - "type": "array", - "description": "URIs (expressed as a URN or URL) of the input files for each tool.", - "items": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - }, - "output_list": { - "type": "array", - "description": "URIs (expressed as a URN or URL) of the output files for each tool.", - "items": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - } - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/error_domain.json b/api/validation_definitions/IEEE_sub/domains/error_domain.json deleted file mode 100755 index c0be62b0..00000000 --- a/api/validation_definitions/IEEE_sub/domains/error_domain.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/error_domain.json", - "type": "object", - "title": "Error Domain", - "description": "Fields in the Error Domain are open-ended and not restricted nor defined by the IEEE-2791 standard. It is RECOMMENDED that the keys directly under empirical_error and algorithmic_error use a full URI. Resolving the URI SHOULD give a JSON Schema or textual definition of the field. Other keys are not allowed error_domain", - "additionalProperties": false, - "required": [ - "empirical_error", - "algorithmic_error" - ], - "properties": { - "empirical_error": { - "type": "object", - "title": "Empirical Error", - "description": "empirically determined values such as limits of detectability, false positives, false negatives, statistical confidence of outcomes, etc. This can be measured by running the algorithm on multiple data samples of the usability domain or through the use of carefully designed in-silico data." - }, - "algorithmic_error": { - "type": "object", - "title": "Algorithmic Error", - "description": "descriptive of errors that originate by fuzziness of the algorithms, driven by stochastic processes, in dynamically parallelized multi-threaded executions, or in machine learning methodologies where the state of the machine can affect the outcome." - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/execution_domain.json b/api/validation_definitions/IEEE_sub/domains/execution_domain.json deleted file mode 100755 index 26a7930b..00000000 --- a/api/validation_definitions/IEEE_sub/domains/execution_domain.json +++ /dev/null @@ -1,111 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/execution_domain.json", - "type": "object", - "title": "Execution Domain", - "description": "The fields required for execution of the IEEE-2791 Object are herein encapsulated together in order to clearly separate information needed for deployment, software configuration, and running applications in a dependent environment", - "required": [ - "script", - "script_driver", - "software_prerequisites", - "external_data_endpoints", - "environment_variables" - ], - "additionalProperties": false, - "properties": { - "script": { - "type": "array", - "description": "points to a script object or objects that was used to perform computations for this IEEE-2791 Object instance.", - "items": { - "additionalProperties": false, - "properties": { - "uri": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - }, - "script_driver": { - "type": "string", - "description": "Indication of the kind of executable that can be launched in order to perform a sequence of commands described in the script in order to run the pipelin", - "examples": [ - "hive", - "cwl-runner", - "shell" - ] - }, - "software_prerequisites": { - "type": "array", - "description": "Minimal necessary prerequisites, library, tool versions needed to successfully run the script to produce this IEEE-2791 Object.", - "items": { - "type": "object", - "description": "A necessary prerequisite, library, or tool version.", - "required": [ - "name", - "version", - "uri" - ], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Names of software prerequisites", - "examples": [ - "HIVE-hexagon" - ] - }, - "version": { - "type": "string", - "description": "Versions of the software prerequisites", - "examples": [ - "babajanian.1" - ] - }, - "uri": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - }, - "external_data_endpoints": { - "type": "array", - "description": "Minimal necessary domain-specific external data source access in order to successfully run the script to produce this IEEE-2791 Object.", - "items": { - "type": "object", - "description": "Requirement for network protocol endpoints used by a pipeline’s scripts, or other software.", - "required": [ - "name", - "url" - ], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Description of the service that is accessed", - "examples": [ - "HIVE", - "access to e-utils" - ] - }, - "url": { - "type": "string", - "description": "The endpoint to be accessed.", - "examples": [ - "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - ] - } - } - } - }, - "environment_variables": { - "type": "object", - "description": "Environmental parameters that are useful to configure the execution environment on the target platform.", - "additionalProperties": false, - "patternProperties": { - "^[a-zA-Z_]+[a-zA-Z0-9_]*$": { - "type": "string" - } - } - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/io_domain.json b/api/validation_definitions/IEEE_sub/domains/io_domain.json deleted file mode 100755 index 1f163e75..00000000 --- a/api/validation_definitions/IEEE_sub/domains/io_domain.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/io_domain.json", - "type": "object", - "title": "Input and Output Domain", - "description": "The list of global input and output files created by the computational workflow, excluding the intermediate files. Custom to every specific IEEE-2791 Object implementation, these fields are pointers to objects that can reside in the system performing the computation or any other accessible system.", - "required": [ - "input_subdomain", - "output_subdomain" - ], - "properties": { - "input_subdomain": { - "type": "array", - "title": "input_domain", - "description": "A record of the references and input files for the entire pipeline. Each type of input file is listed under a key for that type.", - "items": { - "additionalProperties": false, - "type": "object", - "required": [ - "uri" - ], - "properties": { - "uri": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - }, - "output_subdomain": { - "type": "array", - "title": "output_subdomain", - "description": "A record of the outputs for the entire pipeline.", - "items": { - "type": "object", - "title": "The Items Schema", - "required": [ - "mediatype", - "uri" - ], - "properties": { - "mediatype": { - "type": "string", - "title": "mediatype", - "description": "https://www.iana.org/assignments/media-types/", - "default": "application/octet-stream", - "examples": [ - "text/csv" - ], - "pattern": "^(.*)$" - }, - "uri": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/uri" - } - } - } - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/parametric_domain.json b/api/validation_definitions/IEEE_sub/domains/parametric_domain.json deleted file mode 100755 index cde0644b..00000000 --- a/api/validation_definitions/IEEE_sub/domains/parametric_domain.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/parametric_domain.json", - "type": "array", - "title": "Parametric Domain", - "description": "This represents the list of NON-default parameters customizing the computational flow which can affect the output of the calculations. These fields can be custom to each kind of analysis and are tied to a particular pipeline implementation", - "items":{ - "required": [ - "param", - "value", - "step" - ], - "additionalProperties": false, - "properties": { - "param": { - "type": "string", - "title": "param", - "description": "Specific variables for the computational workflow", - "examples": [ - "seed" - ] - }, - "value": { - "type": "string", - "description": "Specific (non-default) parameter values for the computational workflow", - "title": "value", - "examples": [ - "14" - ] - }, - "step": { - "type": "string", - "title": "step", - "description": "Refers to the specific step of the workflow relevant to the parameters specified in 'param' and 'value'", - "examples": [ - "1" - ], - "pattern": "^(.*)$" - } - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/provenance_domain.json b/api/validation_definitions/IEEE_sub/domains/provenance_domain.json deleted file mode 100755 index c2406158..00000000 --- a/api/validation_definitions/IEEE_sub/domains/provenance_domain.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/provenance_domain.json", - "type": "object", - "title": "Provenance Domain", - "description": "Structured field for tracking data through transformations, including contributors, reviewers, and versioning.", - "required": [ - "name", - "version", - "created", - "modified", - "contributors", - "license" - ], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Public searchable name for IEEE-2791 Object. This public field should take free text value using common biological research terminology supporting the terminology used in the usability_domain, external references (xref), and keywords sections.", - "examples": [ - "HCV1a ledipasvir resistance SNP detection" - ] - }, - "version": { - "type": "string", - "description": "Records the versioning of this IEEE-2791 Object instance. IEEE-2791 Object Version should adhere to semantic versioning as recommended by Semantic Versioning 2.0.0.", - "reference": "https://semver.org/spec/v2.0.0.html", - "examples": [ - "2.9" - ] - }, - "review": { - "type": "array", - "description": "Description of the current verification status of an object in the review process. The unreviewed flag indicates that the object has been submitted, but no further evaluation or verification has occurred. The in-review flag indicates that verification is underway. The approved flag indicates that the IEEE-2791 Object has been verified and reviewed. The suspended flag indicates an object that was once valid is no longer considered valid. The rejected flag indicates that an error or inconsistency was detected in the IEEE-2791 Object, and it has been removed or rejected. The fields from the contributor object (described in section 2.1.10) is inherited to populate the reviewer section.", - "items": { - "type": "object", - "required": [ - "status", - "reviewer" - ], - "additionalProperties": false, - "properties": { - "date": { - "type": "string", - "format": "date-time" - }, - "reviewer": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/contributor", - "description": "Contributer that assigns IEEE-2791 review status." - }, - "reviewer_comment": { - "type": "string", - "description": "Optional free text comment by reviewer", - "examples": [ - "Approved by research institution staff. Waiting for approval from regulator" - ] - }, - "status": { - "type": "string", - "enum": [ - "unreviewed", - "in-review", - "approved", - "rejected", - "suspended" - ], - "description": "Current verification status of the IEEE-2791 Object", - "default": "unreviewed" - } - } - } - }, - "derived_from": { - "description": "value of `ieee2791_id` field of another IEEE-2791 that this object is partially or fully derived from", - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/object_id" - }, - "obsolete_after": { - "type": "string", - "description": "If the object has an expiration date, this optional field will specify that using the ‘datetime’ type described in ISO-8601 format, as clarified by W3C https://www.w3.org/TR/NOTE-datetime.", - "format": "date-time" - }, - "embargo": { - "type": "object", - "description": "If the object has a period of time during which it shall not be made public, that range can be specified using these optional fields. Using the datetime type, a start and end time are specified for the embargo.", - "additionalProperties": false, - "properties": { - "start_time": { - "type": "string", - "description": "Beginning date of embargo period.", - "format": "date-time" - }, - "end_time": { - "type": "string", - "description": "End date of embargo period.", - "format": "date-time" - } - } - }, - "created": { - "type": "string", - "description": "Date and time of the IEEE-2791 Object creation", - "readOnly": true, - "format": "date-time" - }, - "modified": { - "type": "string", - "description": "Date and time the IEEE-2791 Object was last modified", - "readOnly": true, - "format": "date-time" - }, - "contributors": { - "type": "array", - "description": "This is a list to hold contributor identifiers and a description of their type of contribution, including a field for ORCIDs to record author information, as they allow for the author to curate their information after submission. The contribution type is a choice taken from PAV ontology: provenance, authoring and versioning, which also maps to the PROV-O.", - "items": { - "$ref": "IEEE_sub/IEEE2791-2020.schema#/definitions/contributor" - } - }, - "license": { - "type": "string", - "description": "Creative Commons license or other license information (text) space. The default or recommended license can be Attribution 4.0 International as shown in example", - "examples": [ - "https://spdx.org/licenses/CC-BY-4.0.html" - ] - } - } -} diff --git a/api/validation_definitions/IEEE_sub/domains/usability_domain.json b/api/validation_definitions/IEEE_sub/domains/usability_domain.json deleted file mode 100755 index 54e936e4..00000000 --- a/api/validation_definitions/IEEE_sub/domains/usability_domain.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://w3id.org/ieee/ieee-2791-schema/usability_domain.json", - "type": "array", - "title": "Usability Domain", - "description": "Author-defined usability domain of the IEEE-2791 Object. This field is to aid in search-ability and provide a specific description of the function of the object.", - "items": { - "type": "string", - "description": "Free text values that can be used to provide scientific reasoning and purpose for the experiment", - "examples": [ - "Identify baseline single nucleotide polymorphisms SNPs [SO:0000694], insertions [so:SO:0000667], and deletions [so:SO:0000045] that correlate with reduced ledipasvir [pubchem.compound:67505836] antiviral drug efficacy in Hepatitis C virus subtype 1 [taxonomy:31646]", - "Identify treatment emergent amino acid substitutions [so:SO:0000048] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid substitutions [so:SO:0000048] identified correlate with treatment failure involving other drugs against the same virus" - ] - } -} diff --git a/api/views.py b/api/views.py deleted file mode 100755 index 48424375..00000000 --- a/api/views.py +++ /dev/null @@ -1,1824 +0,0 @@ -#!/usr/bin/env python3 -"""BCODB views - -Django views for BCODB API -""" - -import jwt -from django.contrib.auth.models import User -from drf_yasg import openapi -from drf_yasg.utils import swagger_auto_schema -from rest_framework import status -from rest_framework.permissions import IsAuthenticated -from rest_framework.renderers import TemplateHTMLRenderer -from rest_framework.response import Response -from rest_framework.views import APIView -from rest_framework.authtoken.models import Token -from api.permissions import RequestorInPrefixAdminsGroup -from api.scripts.method_specific.GET_activate_account import GET_activate_account -from api.scripts.method_specific.GET_draft_object_by_id import get_draft_object_by_id -from api.scripts.method_specific.GET_published_object_by_id import ( - GET_published_object_by_id, -) -from api.scripts.method_specific.GET_published_object_by_id_with_version import ( - GET_published_object_by_id_with_version, -) -from api.scripts.method_specific.POST_validate_payload_against_schema import ( - post_validate_bco, -) - -# Request-specific methods -from api.model.groups import ( - post_api_groups_modify, - post_api_groups_delete, - post_api_groups_info, - post_api_groups_create, -) -from api.model.prefix import ( - post_api_prefixes_create, - post_api_prefixes_delete, - post_api_prefixes_modify, - post_api_prefixes_permissions_set, - post_api_prefixes_token_flat, -) - -from api.scripts.method_specific.POST_api_accounts_describe import ( - POST_api_accounts_describe, -) -from api.scripts.method_specific.POST_api_accounts_new import POST_api_accounts_new -from api.scripts.method_specific.POST_api_objects_drafts_create import ( - post_api_objects_drafts_create, -) -from api.scripts.method_specific.POST_api_objects_drafts_modify import ( - post_api_objects_drafts_modify, -) -from api.scripts.method_specific.POST_api_objects_drafts_permissions import ( - POST_api_objects_drafts_permissions, -) -from api.scripts.method_specific.POST_api_objects_drafts_permissions_set import ( - POST_api_objects_drafts_permissions_set, -) -from api.scripts.method_specific.POST_api_objects_drafts_publish import ( - post_api_objects_drafts_publish, -) -from api.scripts.method_specific.POST_api_objects_drafts_read import ( - POST_api_objects_drafts_read, -) -from api.scripts.method_specific.POST_api_objects_drafts_token import ( - POST_api_objects_drafts_token, -) -from api.scripts.method_specific.POST_api_objects_publish import ( - post_api_objects_publish, -) -from api.scripts.method_specific.POST_api_objects_published import ( - POST_api_objects_published, -) -from api.scripts.method_specific.POST_api_objects_search import post_api_objects_search -from api.scripts.method_specific.POST_api_objects_token import POST_api_objects_token - -# For helper functions -from api.scripts.utilities import UserUtils - - -################################################################################################ -# NOTES -################################################################################################ -# Permissions -# We can't use the examples given in -# https://www.django-rest-framework.org/api-guide/permissions/#djangomodelpermissions -# because our permissions system is not tied to -# the request type (DELETE, GET, PATCH, POST). -################################################################################################ - - -# TODO: This is a helper function so might want to go somewhere else -def check_post_and_process(request, PostFunction) -> Response: - """ - Helper function to perform the verification that a request is a POST and to then - make a call to the callback function with the request body. - - Returns: An HTTP Response Object - """ - # checked is suppressed for the milestone. - - # Check the request - # checked = RequestUtils.RequestUtils().check_request_templates( - # method = 'POST', - # request = request.data - # ) - - checked = None - if checked is None: - # Pass the request to the handling function. - return PostFunction(request) - else: - return Response(data=checked, status=status.HTTP_400_BAD_REQUEST) - - -# TODO: This is currently commented out; need to see what checking is meant to do -def check_get(request) -> Response: - """ - Helper function to perform the verification that a request is a GET - - Returns: An HTTP Response Object - """ - # Check the request - # checked = RequestUtils.RequestUtils().check_request_templates( - # method = 'GET', - # request = request.data - # ) - - # Placeholder - return Response(status=status.HTTP_200_OK) - - -class ApiAccountsActivateUsernameTempIdentifier(APIView): - """ - Activate an account - - -------------------- - - This endpoint is a GET request to activate a new account. - To activate an account during registration we receive an email or a - temporary identifier to authenticate and activate account. This endpoint - will check the validity of the provided temporary identifier for a specific - user account. This is open to anyone to activate a new account, as long as - they have a valid token generated by this host. This allows other users - to act as the verification layer in addition to the system. - - """ - - authentication_classes = [] - permission_classes = [] - - # For the success and error messages - renderer_classes = [TemplateHTMLRenderer] - template_name = "api/account_activation_message.html" - - auth = [] - auth.append( - openapi.Parameter( - "username", - openapi.IN_PATH, - description="Username to be authenticated.", - type=openapi.TYPE_STRING, - ) - ) - auth.append( - openapi.Parameter( - "temp_identifier", - openapi.IN_PATH, - description="The temporary identifier needed to authenticate the activation. This " - "is found in the temporary account table (i.e. where an account is " - "staged).", - type=openapi.TYPE_STRING, - ) - ) - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "Account has been activated.", - 403: "Requestor's credentials were rejected.", - }, - tags=["Account Management"], - ) - def get(self, request, username: str, temp_identifier: str): - check_get(request) - checked = None - if checked is None: - return GET_activate_account( - username=username, temp_identifier=temp_identifier - ) - else: - return Response( - {"activation_success": False, "status": status.HTTP_400_BAD_REQUEST} - ) - - -# Source: https://www.django-rest-framework.org/api-guide/authentication/#by-exposing-an-api-endpoint -class ApiAccountsDescribe(APIView): - """ - Account details - - -------------------- - No schema for this request since only the Authorization header is required. - The word 'Token' must be included in the header. - For example: 'Token 627626823549f787c3ec763ff687169206626149' - """ - - auth = [ - openapi.Parameter( - "Authorization", - openapi.IN_HEADER, - description="Authorization Token", - type=openapi.TYPE_STRING, - ) - ] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "Authorization is successful.", - 403: "Forbidden. Authentication credentials were not provided.", - 403: "Invalid token" - }, - tags=["Account Management"], - ) - def post(self, request): - """ - Pass the request to the handling function - Source: https://stackoverflow.com/a/31813810 - """ - - if request.headers["Authorization"].split(" ")[0] == "Token" or request.headers["Authorization"].split(" ")[0] == "TOKEN": - return POST_api_accounts_describe( - token=request.META.get("HTTP_AUTHORIZATION") - ) - if request.headers["Authorization"].split(" ")[0] == "Bearer": - jw_token=request.META.get("HTTP_AUTHORIZATION").split(" ")[1] - unverified_payload = jwt.decode(jw_token, None, False) - user = User.objects.get(email=unverified_payload['email']) - token = "Thing "+ str(Token.objects.get(user=user)) - return POST_api_accounts_describe(token) - else: - return Response(status=status.HTTP_400_BAD_REQUEST) - - -class ApiGroupsInfo(APIView): - """Group Info - - -------------------- - - This API call checks a user's groups and permissions in ths system. The User token is - required. - - ```JSON - { - "POST_api_groups_info": { - "names": [ - "bco_drafter", "bco_publisher" - ] - } - } - ``` - """ - - POST_api_groups_info_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["names"], - properties={ - "names": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="List of groups to delete.", - items=openapi.Schema(type=openapi.TYPE_STRING), - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Group Information Schema", - description="API call checks a user's groups and permissions" - " in this system.", - required=["POST_api_groups_info"], - properties={"POST_api_groups_info": POST_api_groups_info_schema}, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Success. Group permissions returned", - 400: "Bad request. Request is not formatted correctly.", - 403: "Forbidden. Invalid token or authentication credentials were not provided.", - }, - tags=["Group Management"], - ) - def post(self, request): - return check_post_and_process(request, post_api_groups_info) - - -class ApiGroupsCreate(APIView): - """Create group - - -------------------- - This API call creates a BCO group in ths system. The name of the group is - required but all other parameters are optional. - """ - - POST_api_groups_create_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["name"], - properties={ - "name": openapi.Schema( - type=openapi.TYPE_STRING, description="The name of the group to create" - ), - "usernames": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema(type=openapi.TYPE_STRING), - description="List of users to add to the group.", - ), - "delete_members_on_group_deletion": openapi.Schema( - type=openapi.TYPE_BOOLEAN, - description="Delete the members of the group if the group is deleted.", - ), - "description": openapi.Schema( - type=openapi.TYPE_STRING, description="Description of the group." - ), - "expiration": openapi.Schema( - type=openapi.TYPE_STRING, - description="Expiration date and time of the group. Note, " - "this needs to be in a Python DateTime compatible format.", - ), - "max_n_members": openapi.Schema( - type=openapi.TYPE_INTEGER, - description="Maximum number of members to allow in the group.", - ), - }, - description="Groups to create along with associated information.", - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Group Creation Schema", - description="Parameters that are supported when trying to create a group.", - required=["POST_api_groups_create"], - properties={ - "POST_api_groups_create": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_groups_create_schema, - description="Groups and actions to take on them.", - ) - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Group creation is successful.", - 400: "Bad request.", - 403: "Invalid token.", - 409: "Group conflict. There is already a group with this name.", - }, - tags=["Group Management"], - ) - def post(self, request): - """ "Post?""" - return check_post_and_process(request, post_api_groups_create) - - -class ApiGroupsDelete(APIView): - """ - Delete group - - -------------------- - - Deletes one or more groups from the BCO API database. Even if not all - requests are successful, the API can return success. If a 300 response is - returned then the caller should loop through the response to understand - which deletes failed and why. - """ - - POST_api_groups_delete_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["names"], - properties={ - "names": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="List of groups to delete.", - items=openapi.Schema(type=openapi.TYPE_STRING), - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Group Deletion Schema", - description="Parameters that are supported when trying to delete " - "one or more groups.", - required=["POST_api_groups_delete"], - properties={"POST_api_groups_delete": POST_api_groups_delete_schema}, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Group deletion is successful.", - 300: "Mixture of successes and failures in a bulk delete.", - 400: "Bad request.", - 403: "Invalid token.", - 404: "Missing optional bulk parameters, this request has no effect.", - 418: "More than the expected one group was deleted.", - }, - tags=["Group Management"], - ) - def post(self, request): - return check_post_and_process(request, post_api_groups_delete) - - -class ApiGroupsModify(APIView): - """Bulk Modify groups - - -------------------- - Modifies one or more existing BCO groups. An array of objects are taken - where each of these objects represents the instructions to modify a - specific group. Within each of these objects, along with the group name, - the set of modifications to that group exists in a dictionary indecated by - the following 'actions': 'rename', 'redescribe', 'add_users', - 'remove_users', and 'owner_user'. - - Example request body which encodes renaming a group named `myGroup1` to - `myGroup2`: - ``` - "POST_api_groups_modify": [ - { - "name": "myGroup1", - "actions": { - "rename": "myGroup2" - } - } - ] - ``` - - More than one action can be included for a specific group name, and more - than one group can be modified with a request. To modify multiple groups - they must each have their own request object. - """ - - POST_api_groups_modify_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["name"], - properties={ - "name": openapi.Schema( - type=openapi.TYPE_STRING, description="The name of the group to modify" - ), - "actions": openapi.Schema( - type=openapi.TYPE_OBJECT, - properties={ - "rename": openapi.Schema(type=openapi.TYPE_STRING, description=""), - "redescribe": openapi.Schema( - type=openapi.TYPE_STRING, - description="Change the description of the group to this.", - ), - "owner_user": openapi.Schema( - type=openapi.TYPE_STRING, - description="Change the owner of the group to this user.", - ), - "remove_users": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema(type=openapi.TYPE_STRING), - description="Users to remove from the group.", - ), - "disinherit_from": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema(type=openapi.TYPE_STRING), - description="Groups to disinherit permissions from.", - ), - "add_users": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema(type=openapi.TYPE_STRING), - description="Users to add to the group.", - ), - "inherit_from": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema(type=openapi.TYPE_STRING), - description="Groups to inherit permissions from.", - ), - }, - description="Actions to take upon the group.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Group Modification Schema", - description="Parameters that are supported when trying to modify one or more groups.", - required=["POST_api_groups_modify"], - properties={ - "POST_api_groups_modify": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_groups_modify_schema, - description="Groups and actions to take on them.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Group modification is successful.", - 400: "Bad request.", - 403: "Insufficient privileges.", - }, - tags=["Group Management"], - ) - def post(self, request): - return check_post_and_process(request, post_api_groups_modify) - - -class ApiAccountsNew(APIView): - """ - Account creation request - - -------------------- - - Ask for a new account. Sends an e-mail to the provided e-mail, which must - then be clicked to activate the account. - - The account create depends on creation of an account in the associated - user database. The authentication as well as the user database host - information is used to make this request. - - ```JSON - { - "hostname": "http://localhost:8000", - "email": "example_email@example.com", - "token": "eyJ1c2VyX2lkIjoyNCwidXNlcm5hbWUiOiJoYWRsZXlraW5nIiwiZXhwIjoxNjQwNzE5NTUwLCJlbWFpbCI6ImhhZGxleV9raW5nQGd3dS5lZHUiLCJvcmlnX2lhdCI6MTY0MDExNDc1MH0.7G3VPmxUBOWFfu-fMt1_UsWAcH_Gd1DfpQa83EwFwYY" - } - ``` - """ - - # Anyone can ask for a new account - authentication_classes = [] - permission_classes = [] - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Account Creation Schema", - description="Account creation schema description.", - required=["hostname", "email", "token"], - properties={ - "hostname": openapi.Schema( - type=openapi.TYPE_STRING, description="Hostname of the User Database." - ), - "email": openapi.Schema( - type=openapi.TYPE_STRING, description="Email address of user." - ), - "token": openapi.Schema( - type=openapi.TYPE_STRING, - description="Token returned with new user being " - "generated in the User Database.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 201: "Account creation request is successful.", - 400: "Bad request format.", - 409: "Account has already been authenticated or requested.", - }, - tags=["Account Management"], - ) - def post(self, request) -> Response: - print("Request: {}".format(request)) - return check_post_and_process(request, POST_api_accounts_new) - - -class ApiObjectsDraftsCreate(APIView): - """ - Create BCO Draft - - -------------------- - - Creates a new BCO draft object. - """ - - POST_api_objects_draft_create_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["prefix", "owner_group", "schema", "contents"], - properties={ - "prefix": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Prefix to use" - ), - "owner_group": openapi.Schema( - type=openapi.TYPE_STRING, description="Group which owns the BCO draft." - ), - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "schema": openapi.Schema( - type=openapi.TYPE_STRING, description="Which schema the BCO satisfies." - ), - "contents": openapi.Schema( - type=openapi.TYPE_OBJECT, - additional_properties=True, - description="Contents of the BCO.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Create BCO Draft Schema", - description="Parameters that are supported when trying to create a draft BCO.", - required=["POST_api_objects_draft_create"], - properties={ - "POST_api_objects_draft_create": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_draft_create_schema, - description="BCO Drafts to create.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Creation of BCO draft is successful.", - 300: "Some requests failed and some succeeded.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_objects_drafts_create) - - -class ApiObjectsDraftsModify(APIView): - """ - Bulk Modify BCO Objects - - -------------------- - - Modifies one or more BCO objects. The BCO objects must be a draft in order - to be modifiable. WARNING: The contents of the BCO will be replaced with - the new contents provided in the request body. - """ - - POST_api_objects_drafts_modify_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["object_id", "contents"], - properties={ - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "contents": openapi.Schema( - type=openapi.TYPE_OBJECT, - description="Contents of the BCO.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Modify BCO Draft Schema", - description="Parameters that are supported when trying to modify a draft BCO.", - required=["POST_api_objects_drafts_modify"], - properties={ - "POST_api_objects_drafts_modify": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_drafts_modify_schema, - description="BCO Drafts to modify.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "All modifications of BCO drafts are successful.", - 207: "Some or all BCO modifications failed. Each object submitted" - " will have it's own response object with it's own status" - " code and message:\n" - "200: Success. The object with ID <'object_id'> was" - "updated.\n" - "400: Bad request. The request could not be processed with" - "the parameters provided.\n " - "401: Prefix unauthorized. The token provided does not " - "have draft permissions for this prefix <'prefix'>.\n" - "404: Not Found. The object ID <'object_id'> was not found " - "on the server.\n" - "409: Conflict. The provided object_id <'object_id'> does " - "not match the saved draft object_id <'object_id'>. " - "Once a draft is created you can not change the " - "object_id.\n", - 400: "Bad request.", - 403: "Forbidden. Authentication credentials were not provided, or the token is invalid." - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_objects_drafts_modify) - - -class ApiObjectsDraftsPermissions(APIView): - """ - Get Permissions for a BCO Object - - -------------------- - - Gets the permissions for a BCO object. - """ - - POST_api_objects_drafts_permissions_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["object_id", "contents"], - properties={ - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "contents": openapi.Schema( - type=openapi.TYPE_OBJECT, - additional_properties=True, - description="Contents of the BCO.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Get BCO Permissions Schema", - description="Parameters that are supported when fetching draft BCO permissions.", - required=["POST_api_objects_drafts_permissions"], - properties={ - "POST_api_objects_drafts_permissions": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_drafts_permissions_schema, - description="BCO Drafts to fetch permissions for.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Checking BCO permissions is successful.", - 300: "Some requests failed.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, POST_api_objects_drafts_permissions) - - -class ApiObjectsDraftsPermissionsSet(APIView): - """ - Set Permissions for a BCO Object - - -------------------- - - Sets the permissions for a BCO object. The BCO object must be in draft form. - - NOTE: This is currently a work in progress and may not yet work. - """ - - # TODO: The POST_api_objects_draft_permissions_set call needs to be fixed, doesn't appear to work - POST_api_objects_drafts_permissions_set_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["object_id"], - properties={ - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "actions": openapi.Schema( - type=openapi.TYPE_OBJECT, - properties={ - "remove_permissions": openapi.Schema( - type=openapi.TYPE_STRING, - description="Remove permissions from these users.", - ), - "full_permissions": openapi.Schema( - type=openapi.TYPE_STRING, - description="Give users full permissions.", - ), - "add_permissions": openapi.Schema( - type=openapi.TYPE_STRING, - description="Add permissions to these users.", - ), - }, - description="Actions to modify BCO permissions.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Set BCO Permissions Schema", - description="Parameters that are supported when setting draft BCO permissions.", - required=["POST_api_objects_drafts_permissions_set"], - properties={ - "POST_api_objects_drafts_permissions_set": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_drafts_permissions_set_schema, - description="BCO Drafts to set permissions for.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Setting BCO permissions is successful.", - 300: "Some requests failed.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, POST_api_objects_drafts_permissions_set) - - -# TODO: What is the difference between this and ApiObjectsPublish? -class ApiObjectsDraftsPublish(APIView): - """ - Publish a BCO - - -------------------- - - Publish a draft BCO object. Once published, a BCO object becomes immutable. - """ - - # TODO: This seems to be missing group, which I would expect to be part of the publication - permission_classes = [IsAuthenticated] - - POST_api_objects_drafts_publish_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["draft_id", "prefix"], - properties={ - "prefix": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Prefix to publish with." - ), - "draft_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object Draft ID." - ), - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "delete_draft": openapi.Schema( - type=openapi.TYPE_BOOLEAN, - description="Whether or not to delete the draft." " False by default.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Publish Draft BCO Schema", - description="Parameters that are supported when setting publishing BCOs.", - required=["POST_api_objects_drafts_publish"], - properties={ - "POST_api_objects_drafts_publish": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_drafts_publish_schema, - description="BCO drafts to publish.", - ) - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "BCO Publication is successful.", - 300: "Some requests failed.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_objects_drafts_publish) - - -class ApiObjectsDraftsRead(APIView): - """ - Read BCO - - -------------------- - - Reads a draft BCO object. - """ - - POST_api_objects_drafts_read_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["object_id"], - properties={ - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Read BCO Schema", - description="Parameters that are supported when reading BCOs.", - required=["POST_api_objects_drafts_read"], - properties={ - "POST_api_objects_drafts_read": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_drafts_read_schema, - description="BCO objects to read.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Read BCO is successful.", - 300: "Some requests failed.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, POST_api_objects_drafts_read) - - -# TODO: This should probably also be a GET (or only a GET) -class ApiObjectsDraftsToken(APIView): - """Get Draft BCOs - - -------------------- - Get all the draft objects for a given token. - You can specify which information should be returned with this. - """ - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Get Draft BCO Schema", - description="Parameters that are supported when fetching a draft BCO.", - required=["POST_api_objects_drafts_token"], - properties={ - "POST_api_objects_drafts_token": openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["fields"], - properties={ - "fields": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema( - type=openapi.TYPE_STRING, - description="Field to return", - enum=[ - "contents", - "last_update", - "object_class", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - ), - description="Fields to return.", - ) - }, - ) - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Fetch BCO drafts is successful.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - # TODO: Not checking for authorization here? - # No schema for this request since only - # the Authorization header is required. - return POST_api_objects_drafts_token(rqst=request) - - -class ApiObjectsPublish(APIView): - """Directly publish a BCO - - -------------------- - Take the bulk request and publish objects directly. - """ - - POST_api_objects_publish_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["prefix", "owner_group", "schema", "contents"], - properties={ - "prefix": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Prefix to use" - ), - "owner_group": openapi.Schema( - type=openapi.TYPE_STRING, description="Group which owns the BCO." - ), - "object_id": openapi.Schema( - type=openapi.TYPE_STRING, description="BCO Object ID." - ), - "schema": openapi.Schema( - type=openapi.TYPE_STRING, description="Which schema the BCO satisfies." - ), - "contents": openapi.Schema( - type=openapi.TYPE_OBJECT, - description="Contents of the BCO.", - ), - }, - ) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="BCO Publication Schema", - description="Parameters that are supported when trying to create a published BCO.", - properties={ - "POST_api_objects_publish": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_objects_publish_schema, - description="BCO Drafts to create.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "BCO publication is successful.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_objects_publish) - -class ApiObjectsSearch(APIView): - """ - Search for BCO - - -------------------- - - Search for available BCO objects that match criteria. - - `type` can be one of 3 different values => mine | prefix | bco_id - `search` should be an empty string if you are doing the mine search as that is for "My BCOs" - For prefix `search` should be the name of the prefix. - For `bco_id` it should be some substring that is present in the desired `bco_id` or SET of `bco_ids` - - Shell - ```shell - curl -X POST "http://localhost:8000/api/objects/search/" -H "accept: application/json" -H "Authorization: Token ${token}" -H "Content-Type: application/json" -d "{\"POST_api_objects_search\":[{\"type\": \"prefix\",\"search\": \"TEST\"}]}" - ``` - - JavaScript - ```javascript - axios.post("http://localhost:8000/api/objects/search/", { - "POST_api_objects_search":[ - { - "type": "prefix", - "search": "TEST" - } - ] - }, { - headers: { - "Authorization": "Token ${token}, - "Content-Type": "application/json" - } - }); - ``` - """ - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="BCO Search Schema", - description="Search for BCOs", - properties={ - "type": openapi.Schema( - type=openapi.TYPE_STRING, description="Type of search to perform" - ), - "search": openapi.Schema( - type=openapi.TYPE_STRING, description="Search value" - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Search successful.", - 404: "That prefix was not found on this server." - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_objects_search) - - -class ApiObjectsToken(APIView): - """ - Get User Draft and Published BCOs - - -------------------- - - Get all BCOs available for a specific token, including published ones. - """ - - # auth = [] - # auth.append( - # openapi.Parameter('Token', openapi.IN_HEADER, description="Authorization Token", type=openapi.TYPE_STRING)) - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Get BCO Schema", - description="Parameters that are supported when fetching a BCOs.", - required=["POST_api_objects_token"], - properties={ - "POST_api_objects_token": openapi.Schema( - type=openapi.TYPE_OBJECT, - required=["fields"], - properties={ - "fields": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=openapi.Schema( - type=openapi.TYPE_STRING, - description="Field to return", - enum=[ - "contents", - "last_update", - "object_class", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ], - ), - description="Fields to return.", - ) - }, - ) - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Fetch BCOs is successful.", - 400: "Bad request.", - 403: "Invalid token.", - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - # No schema for this request since only - # the Authorization header is required. - return POST_api_objects_token(rqst=request) - - -class ApiObjectsPublished(APIView): - """ - Get Published BCOs - - -------------------- - - Get all BCOs available for a specific token, including published ones. - """ - - authentication_classes = [] - permission_classes = [] - auth = [] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "Success.", - 400: "Internal Error. BCO Name and Version are not properly formatted.", - }, - tags=["BCO Management"], - ) - def get(self, request) -> Response: - return POST_api_objects_published() - # return POST_api_objects_token(rqst=request) - - -class ApiPrefixesCreate(APIView): - """ - Create a Prefix - - -------------------- - Create a prefix to be used to classify BCOs and to determine permissions - for objects created under that prefix. The requestor *must* be in the group - prefix_admins to create a prefix. - - ```JSON - { - "POST_api_prefixes_create": [ - { - "owner_group": "bco_publisher", - "owner_user": "anon", - "prefixes": [ - { - "description": "Just a test prefix.", - "expiration_date": "2023-01-01-01-01-01", - "prefix": "testR" - }, - { - "description": "Just another prefix.", - "expiration_date": "2023-01-01-01-01-01", - "prefix": "othER" - } - ] - } - ] - } - ``` - """ - - # Permissions - prefix admins only - permission_classes = [RequestorInPrefixAdminsGroup, IsAuthenticated] - - # TYPE_ARRAY explanation - # Source: https://stackoverflow.com/questions/53492889/drf-yasg-doesnt-take-type-array-as-a-valid-type - - # TODO: Need to get the schema that is being sent here from FE - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Prefix Creation Schema", - description="Several parameters are required to create a prefix.", - required=["owner_user", "prefix"], - properties={ - "description": openapi.Schema( - type=openapi.TYPE_STRING, - description="A description of what this prefix should represent. For example, the prefix 'GLY' would be related to BCOs which were derived from GlyGen workflows.", - ), - "expiration_date": openapi.Schema( - type=openapi.TYPE_STRING, - description="The datetime at which this prefix expires in the format YYYY-MM-DD-HH-MM-SS.", - ), - "owner_group": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which group should own the prefix. *The requestor does not have to be in owner_group to assign this.*", - ), - "owner_user": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which user should own the prefix. *The requestor does not have to be owner_user to assign this.*", - ), - "prefixes": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="Any prefix which satsifies the naming standard (see link...)", - items=openapi.Items(type=openapi.TYPE_STRING), - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 201: "The prefix was successfully created.", - 400: "Bad request for one of two reasons: \n1) the prefix does not" - "follow the naming standard, or \n2) owner_user and/or" - "owner_group do not exist.", - 401: "Unauthorized. Authentication credentials were not provided.", - 403: "Forbidden. User doesnot have permission to perform this action", - 409: "The prefix the requestor is attempting to create already exists.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_prefixes_create) - - -class ApiPrefixesDelete(APIView): - """ - Delete a Prefix - - # Deletes a prefix for BCOs. - -------------------- - The requestor *must* be in the group prefix_admins to delete a prefix. - - __Any object created under this prefix will have its permissions "locked out." This means that any other view which relies on object-level permissions, such as /api/objects/drafts/read/, will not allow any requestor access to particular objects.__ - - ```JSON - { - "POST_api_prefixes_delete": [ - "OTHER", - "TESTR" - ] - } - ``` - - """ - - # Permissions - prefix admins only - permission_classes = [RequestorInPrefixAdminsGroup] - - # TODO: Need to get the schema that is being sent here from FE - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Prefix Deletion Schema", - description="Provide a list of prefixes to delete.", - required=["prefixes"], - properties={ - "prefixes": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="Any prefix in the API.", - items=openapi.Items(type=openapi.TYPE_STRING), - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "Deleting a prefix was successful.", - 401: "Unauthorized. Authentication credentials were not provided.", - 403: "Forbidden. User doesnot have permission to perform this action", - 404: "The prefix couldn't be found so therefore it could not be deleted.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_prefixes_delete) - - -class ApiPrefixesModify(APIView): - """ - Modify a Prefix - - -------------------- - - Modify a prefix which already exists. - - The requestor *must* be in the group prefix_admins to modify a prefix. - - ```JSON - { - "POST_api_prefixes_modify": [ - { - "owner_group": "bco_drafter", - "owner_user": "wheel", - "prefixes": [ - { - "description": "Just another description here.", - "expiration_date": "2025-01-01-01-01-01", - "prefix": "testR" - }, - { - "description": "Just another prefix description here as well.", - "expiration_date": "2025-01-01-01-01-01", - "prefix": "othER" - } - ] - } - ] - } - ``` - - """ - - # Permissions - prefix admins only - permission_classes = [RequestorInPrefixAdminsGroup] - prefixes_object_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=[], - properties={ - "description": openapi.Schema( - type=openapi.TYPE_STRING, - description="A description of what this prefix should" - " represent. For example, the prefix 'GLY' would be " - "related to BCOs which were derived from GlyGen workflows.", - ), - "expiration_date": openapi.Schema( - type=openapi.TYPE_STRING, - description="The datetime at which this prefix expires in the" - " format YYYY-MM-DD-HH-MM-SS.", - ), - "prefix": openapi.Schema( - type=openapi.TYPE_STRING, - description="Any prefix which satsifies the naming standard", - ), - }, - ) - POST_api_prefixes_modify_schema = openapi.Schema( - type=openapi.TYPE_OBJECT, - required=[], - properties={ - "owner_group": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which group should own the prefix. *The" - " requestor does not have to be in the owner group to" - " assign this.*", - ), - "owner_user": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which user should own the prefix. *The requestor" - " does not have to be owner_user but owner_user must be in" - " owner_group*.", - ), - "prefixes": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=prefixes_object_schema, - description="Any prefix which satsifies the naming standard", - ), - }, - ) - - # TODO: Need to get the schema that is being sent here from FE - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Prefix Modification Schema", - description="Several parameters are required to modify a prefix.", - required=["POST_api_prefixes_modify"], - properties={ - "POST_api_prefixes_modify": openapi.Schema( - type=openapi.TYPE_ARRAY, - items=POST_api_prefixes_modify_schema, - description="", - ) - }, - ) # TODO: ADD LINK FOR PREFIX DOCUMENTATION - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "The prefix was successfully modified.", - 400: "Bad request because owner_user and/or owner_group do not exist.", - 404: "The prefix provided could not be found.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_prefixes_modify) - - -class ApiPrefixesPermissionsSet(APIView): - """ - Set Prefix Permissions - - -------------------- - - # Set prefix permissions by user, group, or both. - - The requestor *must* be the owner_user of the prefix. - - At least one of the usernames or groups must actually exist for a permission to be assigned. - - ```JSON - { - "POST_api_prefixes_permissions_set": [ - { - "group": [ - "bco_drafter" - ], - "mode": "add", - "permissions": [ - "change", - "delete", - "view" - ], - "prefixes": [ - "testR", - "BCO" - ], - "username": [ - "some_user" - ] - } - ] - } - ``` - - """ - - # Permissions - prefix admins only - permission_classes = [RequestorInPrefixAdminsGroup] - - # TODO: Need to get the schema that is being sent here from FE - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Prefix Permissions Schema", - description="Set the permissions for a prefix.", - required=["permissions", "prefix"], - properties={ - "group": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which group the permission is being assigned to.", - ), - "mode": openapi.Schema( - type=openapi.TYPE_STRING, - description="Whether to 'add' (append), 'remove' (subtract), or define the 'full_set' of permissions.", - ), - "permissions": openapi.Schema( - type=openapi.TYPE_STRING, description="Which permissions to assign." - ), - "prefix": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which prefix to assign the permissions to.", - ), - "username": openapi.Schema( - type=openapi.TYPE_STRING, - description="Which user the permission is being assigned to.", - ), - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 201: "The prefix permissions were updated succesfully.", - 400: "Bad request because 1) the requestor isn't the owner of the prefix, or 2) the provided username and/or group could not be found.", - 404: "The prefix provided was not found.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_api_prefixes_permissions_set) - - -class ApiPrefixesToken(APIView): - """ - Get list of prefixes - - -------------------- - - Get all available prefixes and their associated permissions for a given token. - The word 'Token' must be included in the header. - - For example: 'Token 627626823549f787c3ec763ff687169206626149'. - """ - - auth = [ - openapi.Parameter( - "Authorization", - openapi.IN_HEADER, - description="Authorization Token", - type=openapi.TYPE_STRING, - ) - ] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "The Authorization header was provided and available prefixes were returned.", - 400: "The Authorization header was not provided.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - if "Authorization" in request.headers: - # Pass the request to the handling function - # Source: https://stackoverflow.com/a/31813810 - return post_api_prefixes_token_flat(request=request) - else: - return Response(status=status.HTTP_400_BAD_REQUEST) - - -class ApiPrefixesTokenFlat(APIView): - """ - Get a flat list of prefixes - - -------------------- - - Get all available prefixes and their associated permissions for a given - token in flat format. The word 'Token' must be included in the header. - - For example: 'Token 627626823549f787c3ec763ff687169206626149'. - """ - - auth = [ - openapi.Parameter( - "Authorization", - openapi.IN_HEADER, - description="Authorization Token", - type=openapi.TYPE_STRING, - ) - ] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "The Authorization header was provided and available prefixes were returned.", - 401: "The Authorization header was not provided.", - }, - tags=["Prefix Management"], - ) - def post(self, request) -> Response: - if "Authorization" in request.headers: - # Pass the request to the handling function - # Source: https://stackoverflow.com/a/31813810 - return post_api_prefixes_token_flat(request=request) - else: - return Response(status=status.HTTP_400_BAD_REQUEST) - - -class ApiPublicDescribe(APIView): - """ - Describe API - - -------------------- - - Returns information about the API. - - """ - - authentication_classes = [] - permission_classes = [] - - # For the success and error messages - # renderer_classes = [ - # TemplateHTMLRenderer - # ] - # template_name = 'api/account_activation_message.html' - - auth = [] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 201: "Account has been authorized.", - 208: "Account has already been authorized.", - 403: "Requestor's credentials were rejected.", - 424: "Account has not been registered.", - }, - tags=["API Information"], - ) - def get(self, request): - # Pass the request to the handling function - return Response(UserUtils.UserUtils().get_user_info(username="anon")) - - -# Source: https://www.django-rest-framework.org/api-guide/permissions/#setting-the-permission-policy -class DraftObjectId(APIView): - """ - Read Object by URI - - -------------------- - - Reads and returns a single object from a given object_id. - - """ - - auth = [] - auth.append( - openapi.Parameter( - "object_id", - openapi.IN_PATH, - description="Object ID to be viewed.", - type=openapi.TYPE_STRING, - ) - ) - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "Success. Object contents returned", - 401: "The contents of the draft could not be sent back because" - " the requestor does not have appropriate permissions.", - 403: "Forbidden. Authentication credentials were not provided, or" - " the token was invalid.", - 404: "Not found. That draft could not be found on the server." - }, - tags=["BCO Management"], - ) - def get(self, request, object_id): - # No need to check the request (unnecessary for GET as it's checked - # by the url parser?). - - # Pass straight to the handler. - # TODO: This is not dealing with the draft_object_id parameter being passed in? - # return GET_draft_object_by_id(do_id=request.build_absolute_uri(), rqst=request) - - # return GET_draft_object_by_id(do_id=draft_object_id, rqst=request) - return get_draft_object_by_id(do_id=object_id, request=request) - - -# Allow anyone to view published objects. -# Source: https://www.django-rest-framework.org/api-guide/permissions/#setting-the-permission-policy -class ObjectIdRootObjectId(APIView): - """ - View Published BCO by ID - -------------------- - Reads and returns a published BCO based on an object ID. This will return the highest versioned object. - """ - - auth = [] - auth.append( - openapi.Parameter( - "object_id_root", - openapi.IN_PATH, - description="Object ID to be viewed.", - type=openapi.TYPE_STRING, - ) - ) - - authentication_classes = [] - permission_classes = [] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 200: "Object returned.", - 404: "Object not found." - }, - tags=["BCO Management"], - ) - def get(self, request, object_id_root): - return GET_published_object_by_id(object_id_root) - -class ObjectIdRootObjectIdVersion(APIView): - """ - View Published BCO by ID and Version - - -------------------- - - Reads and returns a published BCO based on an object ID and a version. - - """ - - # For the success and error messages - # renderer_classes = [ - # TemplateHTMLRenderer - # ] - # template_name = 'api/account_activation_message.html' - - auth = [] - auth.append( - openapi.Parameter( - "object_id_root", - openapi.IN_PATH, - description="Object ID to be viewed.", - type=openapi.TYPE_STRING, - ) - ) - auth.append( - openapi.Parameter( - "object_id_version", - openapi.IN_PATH, - description="Object version to be viewed.", - type=openapi.TYPE_STRING, - ) - ) - - # Anyone can view a published object - authentication_classes = [] - permission_classes = [] - - @swagger_auto_schema( - manual_parameters=auth, - responses={ - 201: "Account has been authorized.", - 208: "Account has already been authorized.", - 403: "Requestor's credentials were rejected.", - 424: "Account has not been registered.", - }, - tags=["BCO Management"], - ) - def get(self, request, object_id_root, object_id_version): - return GET_published_object_by_id_with_version( - object_id_root, object_id_version - ) - - -class ValidateBCO(APIView): - """ - Bulk Validate BCOs - - -------------------- - - Bulk operation to validate BCOs. - - ```JSON - { - "POST_validate_bco": [ - {...BCO CONTENTS...}, - {...BCO CONTENTS...} - ] - } - - """ - - authentication_classes = [] - permission_classes = [] - - request_body = openapi.Schema( - type=openapi.TYPE_OBJECT, - title="Validate BCO", - description="Bulk request for validating a BCO", - required=["BCO"], - properties={ - "POST_validate_bco": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="A BCO to validate", - items=openapi.Items(type=openapi.TYPE_OBJECT), - ) - }, - ) - - @swagger_auto_schema( - request_body=request_body, - responses={ - 200: "All BCO validations are successful.", - 207: "Some or all BCO validations failed. Each object submitted" - " will have it's own response object with it's own status" - " message:\n" - }, - tags=["BCO Management"], - ) - def post(self, request) -> Response: - return check_post_and_process(request, post_validate_bco) diff --git a/authentication/admin.py b/authentication/admin.py index c3776a21..93348991 100644 --- a/authentication/admin.py +++ b/authentication/admin.py @@ -2,6 +2,13 @@ """ from django.contrib import admin -from authentication.models import Authentication +from authentication.models import Authentication, NewUser -admin.site.register(Authentication) \ No newline at end of file +class AuthenticationAdmin(admin.ModelAdmin): + list_display = ["username", "auth_service"] + +class NewUserAdmin(admin.ModelAdmin): + list_display = ["email", "temp_identifier","token", "hostname", "created"] + +admin.site.register(Authentication, AuthenticationAdmin) +admin.site.register(NewUser, NewUserAdmin) \ No newline at end of file diff --git a/authentication/apis.py b/authentication/apis.py index 49bbed2d..fe7f49bc 100644 --- a/authentication/apis.py +++ b/authentication/apis.py @@ -1,7 +1,10 @@ # authentication/apis.py import json +import jwt +import uuid from django.contrib.auth.models import User +from django.conf import settings from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status, serializers @@ -9,20 +12,47 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView -from api.scripts.utilities.UserUtils import UserUtils -from authentication.selectors import check_user_email, get_user_info -from authentication.services import validate_token, create_bcodb, send_bcodb, validate_auth_service -from authentication.models import Authentication +from authentication.models import Authentication, NewUser +from authentication.selectors import ( + check_user_email, + get_user_info, + check_new_user +) +from authentication.services import ( + validate_token, + create_bcodb_user, + send_bcodb, + validate_auth_service, + send_new_user_email +) + +ANON_KEY = settings.ANON_KEY + +AUTH_SCHEMA = { + "iss": openapi.Schema( + type=openapi.TYPE_STRING, + description="The 'iss' (issuer) claim identifies the principal" + " that issued the JWT.", + example="https://example.org" + ), + "sub": openapi.Schema( + type=openapi.TYPE_STRING, + description="The 'sub' (subject) claim identifies the" + " principal that is the subject of the JWT.", + example="0000-0000-0000-0000" + ) +} -class RegisterBcodbAPI(APIView): - """Register BCODB - API View to register a new BCODB user. +class NewAccountApi(APIView): + """ + Account creation request - Methods: - post(request): Register a new BCODB user. + Ask for a new account. Sends an e-mail to the provided e-mail, which must + then be clicked to activate the account. - Attributes: - InputSerializer: Serializer class for validating input data. + The account create depends on creation of an account in the associated + user database. The authentication as well as the user database host + information is used to make this request. """ class InputSerializer(serializers.Serializer): @@ -33,60 +63,224 @@ class InputSerializer(serializers.Serializer): email (str): The email address of the user to register. token (str): The authentication token for the BCODB portal. """ - hostname= serializers.URLField() + email = serializers.EmailField() - token = serializers.CharField() + hostname= serializers.URLField() + token = serializers.CharField(required=False,default='') + + def validate(self, attrs): + attrs['temp_identifier'] = uuid.uuid4().hex + return super().validate(attrs) class Meta: - model = User + model = NewUser fields = ["__all__"] authentication_classes = [] permission_classes = [] - def post(self, request): - """Register a new BCODB user. + @swagger_auto_schema( + operation_id="api_accounts_new", + request_body=openapi.Schema( + type=openapi.TYPE_OBJECT, + title="Account Creation Schema", + description="Account creation schema description.", + required=["hostname", "email"], + properties={ + "hostname": openapi.Schema( + type=openapi.TYPE_STRING, + description="Hostname of the User Database.", + example="http://localhost:8000/" + ), + "email": openapi.Schema( + type=openapi.TYPE_STRING, + description="Email address of user.", + example="test@test.test" + ), + "token": openapi.Schema( + type=openapi.TYPE_STRING, + description="Token returned with new user being " + "generated in the User Database.", + example="testToken123456789" + ), + }, + ), + responses={ + 201: "Account creation request is successful.", + 400: "Bad request format.", + 409: "Account has already been authenticated or requested.", + }, + tags=["Authentication and Account Management"], + ) + + def post(self, request) -> Response: + serializer = self.InputSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + email = serializer.validated_data['email'] + if email == "test@test.test": + return Response( + status=status.HTTP_201_CREATED, + data={ + "message":"Testing account request successful. Check" \ + + " your email for the activation link." + } + ) + + if check_user_email(email) is True: + return Response( + status=status.HTTP_409_CONFLICT, + data={ + "message":f"CONFLICT: That account, {email}, has already "\ + + "been requested. Please contact an admin with further "\ + + "questions." + } + ) - Args: - request (Request): The request object containing the input data. + if check_new_user(email) is True: + return Response( + status=status.HTTP_409_CONFLICT, + data={ + "message": f"That account, {email}, has already been "\ + + "requested. Please contact an admin with further questions." + } + ) + + try: + send_new_user_email(serializer.validated_data) + return Response( + status=status.HTTP_201_CREATED, + data={"message":"Account request granted. Check your email"\ + + " for an activation link."} + ) + except Exception as error: + return Response( + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + data={"message": str(error)} + ) - Returns: - Response: A HTTP response indicating the result of the registration attempt. - """ +class AccountActivateApi(APIView): + """ + Activate an account - user_info = self.InputSerializer(data=request.data) - user_info.is_valid(raise_exception=True) - token = user_info.validated_data['token'] - url = user_info.validated_data['hostname'] - if validate_token(token, url) is False: - return Response(status=status.HTTP_401_UNAUTHORIZED, data={"message": "portal authentication was invalid"}) - if check_user_email(user_info.validated_data['email']) is True: + -------------------- + + This endpoint is a GET request to activate a new account. + To activate an account during registration the user will receive an email + or a temporary identifier to authenticate and activate account. This + endpoint will check the validity of the provided temporary identifier for + a specific user account. This is open to anyone to activate a new account, + as long as they have a valid token generated by this host. This can allow + other users to act as the verification layer in addition to the system. + """ + + authentication_classes = [] + permission_classes = [] + + @swagger_auto_schema( + operation_id="api_accounts_activate", + manual_parameters=[ + openapi.Parameter( + "email", + openapi.IN_PATH, + description="Email to be authenticated.", + type=openapi.TYPE_STRING, + default="test@test.test" + ), + openapi.Parameter( + "temp_identifier", + openapi.IN_PATH, + description="The temporary identifier sent", + type=openapi.TYPE_STRING, + default="testTempIdentifier123456789" + ) + ], + responses={ + 200: "Account has been activated.", + 403: "Requestor's credentials were rejected.", + 404: "That account, {email}, was not found.", + 409: "CONFLICT: That account, {email}, has already been activated" + }, + tags=["Authentication and Account Management"], + ) + + def get(self, request, email: str, temp_identifier: str) -> Response: + if email == "test@test.test": + return Response( + status=status.HTTP_200_OK, + data={"message":f"Account for {email} has been activated"} + ) + if check_user_email(email) is True: return Response( status=status.HTTP_409_CONFLICT, - data={"message": "A BCODB account with that email already exists"} + data={ + "message":f"CONFLICT: That account, {email}, has already "\ + + "been activated." + } + ) + if check_new_user(email) == False: + return Response( + status=status.HTTP_404_NOT_FOUND, + data={ + "message":f"That account, {email}, was not found."\ + } + ) + try: + new_user = NewUser.objects.get( + email=email, + temp_identifier=temp_identifier + ) + create_bcodb_user(new_user.email) + new_user.delete() + return Response( + status=status.HTTP_200_OK, + data={"message":f"Account for {email} has been activated"} + ) + except NewUser.DoesNotExist: + return Response( + status=status.HTTP_403_FORBIDDEN, + data={"message": "Requestor's credentials were rejected."} ) - user = create_bcodb(user_info=user_info.validated_data) - data = json.dumps(get_user_info(user), default=str) - response = send_bcodb( - data=data, request_info=user_info.validated_data - ) - if response.status_code == 200: - return Response(status=status.HTTP_201_CREATED, data={"message": "user account created"}) -class AuthenticationInputSerializer(serializers.Serializer): - auth_service = serializers.JSONField(validators=[validate_auth_service]) +class AccountDescribeApi(APIView): + """ + Account details - class Meta: - model = Authentication - fields = ['username', 'auth_service'] + -------------------- + The word 'Token' or 'Bearer' must be included in the header. + For example: 'Token 627626823549f787c3ec763ff687169206626149' + 'Bearer' indicates a JWT that will be verified with another service. + 'Token' is the API token for this service. + """ + @swagger_auto_schema( + operation_id="api_accounts_describe", + manual_parameters=[ + openapi.Parameter( + "Authorization", + openapi.IN_HEADER, + description="Authorization Token", + type=openapi.TYPE_STRING, + default="Token 627626823549f787c3ec763ff687169206626149" + ) + ], + responses={ + 200: "Authorization is successful.", + 403: "Forbidden. Authentication credentials were not provided.", + 403: "Invalid token" + }, + tags=["Authentication and Account Management"], + ) + + def post(self, request): + user = request._user + user_info = get_user_info(user) + + return Response(status=status.HTTP_200_OK, data=user_info) class AddAuthenticationApi(APIView): """ Add Authentication Object - ----------------------------- - Adds an authentication dictionary to the list of auth_objects for a user ```JSON @@ -97,31 +291,24 @@ class AddAuthenticationApi(APIView): ``` """ + class InputSerializer(serializers.Serializer): + auth_service = serializers.JSONField(validators=[validate_auth_service]) + + class Meta: + model = Authentication + fields = ['username', 'auth_service'] + permission_classes = [IsAuthenticated,] - schema = openapi.Schema( + @swagger_auto_schema( + operation_id="api_auth_add", + request_body=openapi.Schema( type=openapi.TYPE_OBJECT, title="Add Authentication", description="Adds an authentication objetc to the associated user", required=["iss", "sub"], - properties={ - "iss": openapi.Schema( - type=openapi.TYPE_STRING, - description="The 'iss' (issuer) claim identifies the principal" - " that issued the JWT." - ), - "sub": openapi.Schema( - type=openapi.TYPE_STRING, - description="The 'sub' (subject) claim identifies the" - " principal that is the subject of the JWT.", - - ) - } - - ) - - @swagger_auto_schema( - request_body=schema, + properties=AUTH_SCHEMA + ), responses={ 200: "New authentication credentials added to existing object.", 201: "Authentication object created and added to account.", @@ -129,15 +316,26 @@ class AddAuthenticationApi(APIView): 403: "Authentication credentials were not provided.", 409: "That object already exists for this account.", }, - tags=["Authentication"], + tags=["Authentication and Account Management"], ) def post(self, request): - """ - """ + demo_data = { + 'iss': 'https://example.org', + 'sub': '0000-0000-0000-0000' + } + + if request.data == demo_data: + return Response( + status=status.HTTP_200_OK, + data={ + "message": "TESTING: "\ + +"New authentication credentials added to existing object" + } + ) result = validate_auth_service(request.data) - if result != 1: + if result["message"] != "valid": return Response(status=status.HTTP_400_BAD_REQUEST, data=result) try: auth_object = Authentication.objects.get(username=request.user.username) @@ -158,7 +356,7 @@ def post(self, request): username=request.user, auth_service=[request.data] ) - print('status=status.HTTP_201_CREATED') + return Response( status=status.HTTP_201_CREATED, data={"message": "Authentication object created and added to account"} @@ -174,8 +372,6 @@ class RemoveAuthenticationApi(APIView): """ Removes Authentication Object - ----------------------------- - Removes an authentication dictionary to the list of auth_objects for a user ```JSON @@ -187,44 +383,41 @@ class RemoveAuthenticationApi(APIView): """ permission_classes = [IsAuthenticated,] - schema = openapi.Schema( + @swagger_auto_schema( + operation_id="api_auth_remove", + request_body=openapi.Schema( type=openapi.TYPE_OBJECT, title="Remove Authentication", description="Removess an authentication objetc to the associated user", required=["iss", "sub"], - properties={ - "iss": openapi.Schema( - type=openapi.TYPE_STRING, - description="The 'iss' (issuer) claim identifies the principal" - " that issued the JWT." - ), - "sub": openapi.Schema( - type=openapi.TYPE_STRING, - description="The 'sub' (subject) claim identifies the" - " principal that is the subject of the JWT.", - - - ) - } - - ) - - @swagger_auto_schema( - request_body=schema, + properties=AUTH_SCHEMA + ), responses={ 200: "Remove authentication is successful.", 403: "Authentication failed.", 404: "That object does not exist for this account.", }, - tags=["Authentication"], + tags=["Authentication and Account Management"], ) def post(self, request): - """""" + demo_data = { + 'iss': 'https://example.org', + 'sub': '0000-0000-0000-0000' + } + + if request.data == demo_data: + return Response( + status=status.HTTP_200_OK, + data={ + "message": "TESTING: "\ + +"Authentication object removed" + } + ) result = validate_auth_service(request.data) - if result != 1: + if result["message"] != "valid": return Response( status=status.HTTP_403_FORBIDDEN, data=result @@ -249,43 +442,112 @@ def post(self, request): ) class ResetTokenApi(APIView): - """Reset Token - ----------------------------- - Resets the user's token and returns the new one. + """Reset API Token + + Revokes the user's current API token and returns a new one. """ permission_classes = [IsAuthenticated,] - - # schema = openapi.Schema() - - auth = [ - openapi.Parameter( - "Authorization", - openapi.IN_HEADER, - description="Authorization Token", - type=openapi.TYPE_STRING, - ) - ] @swagger_auto_schema( - manual_parameters=auth, + operation_id="api_auth_reset_token", + manual_parameters=[ + openapi.Parameter( + "Authorization", + openapi.IN_HEADER, + description="Authorization Token", + type=openapi.TYPE_STRING, + default=f"Token {ANON_KEY}" + ) + ], responses={ 200: "Token reset is successful.", 403: "Invalid token.", }, - tags=["Authentication"], + tags=["Authentication and Account Management"], ) def post(self, request): try: token = Token.objects.get(user=request.user) + if token.key == ANON_KEY: + return Response( + status=status.HTTP_200_OK, + data=get_user_info(user=request.user) + ) token.delete() Token.objects.create(user=request.user) return Response( status=status.HTTP_200_OK, - data=UserUtils().get_user_info(username=request.user) + data=get_user_info(user=request.user) ) except Exception as error: - return Response(status=status.HTTP_400_BAD_REQUEST, data={"message": f"{error}"}) - \ No newline at end of file + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={"message": f"{error}"} + ) + +class RegisterUserNoVerificationAPI(APIView): + """Register BCODB + API View to register a new BCODB user with out an email verification step. + + Methods: + post(request): Register a new BCODB user. + + Attributes: + InputSerializer: Serializer class for validating input data. + """ + + class InputSerializer(serializers.Serializer): + """Serializer class for validating input data for registering a new BCODB user. + + Fields: + hostname (str): The URL of the BCODB portal. + email (str): The email address of the user to register. + token (str): The authentication token for the BCODB portal. + """ + hostname= serializers.URLField() + email = serializers.EmailField() + token = serializers.CharField() + + class Meta: + model = User + fields = ["__all__"] + + authentication_classes = [] + permission_classes = [] + swagger_schema = None + + def post(self, request): + """Register a new BCODB user. + + Args: + request (Request): The request object containing the input data. + + Returns: + Response: A HTTP response indicating the result of the registration attempt. + """ + + user_info = self.InputSerializer(data=request.data) + user_info.is_valid(raise_exception=True) + token = user_info.validated_data['token'] + url = user_info.validated_data['hostname'] + email = user_info.validated_data['email'] + if validate_token(token, url) is False: + return Response(status=status.HTTP_401_UNAUTHORIZED, data={"message": "portal authentication was invalid"}) + if check_user_email(email) is True: + return Response( + status=status.HTTP_409_CONFLICT, + data={"message": "A BCODB account with that email already exists"} + ) + user = create_bcodb_user(email) + data = json.dumps(get_user_info(user), default=str) + + response = send_bcodb( + data=data, request_info=user_info.validated_data + ) + if response.status_code == 200: + return Response(status=status.HTTP_201_CREATED, data={"message": "user account created"}) + + return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR) diff --git a/authentication/migrations/0001_initial.py b/authentication/migrations/0001_initial.py index 575366d2..25e51bfd 100644 --- a/authentication/migrations/0001_initial.py +++ b/authentication/migrations/0001_initial.py @@ -1,8 +1,9 @@ -# Generated by Django 3.2.10 on 2023-03-27 20:46 +# Generated by Django 3.2.13 on 2024-04-11 21:18 from django.conf import settings from django.db import migrations, models import django.db.models.deletion +import django.utils.timezone class Migration(migrations.Migration): @@ -14,6 +15,17 @@ class Migration(migrations.Migration): ] operations = [ + migrations.CreateModel( + name='NewUser', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('email', models.EmailField(max_length=254)), + ('temp_identifier', models.TextField(max_length=100)), + ('token', models.TextField(blank=True, null=True)), + ('hostname', models.TextField(blank=True, null=True)), + ('created', models.DateTimeField(default=django.utils.timezone.now)), + ], + ), migrations.CreateModel( name='Authentication', fields=[ diff --git a/authentication/models.py b/authentication/models.py index 57f4037f..c22333c8 100644 --- a/authentication/models.py +++ b/authentication/models.py @@ -1,13 +1,29 @@ -import json +#!/usr/bin/env python3 + from django.db import models from django.contrib.auth.models import User +from django.utils import timezone class Authentication(models.Model): - """""" + """Authentication Object + """ + username = models.ForeignKey(User, on_delete=models.CASCADE, to_field="username") auth_service = models.JSONField(default=list) - def __username__(self): """String for representing the model in Admin site.""" return str(self.username) + +class NewUser(models.Model): + """New User + For registering new users. + Instead of using the User model, just use + a crude table to store the temporary information + when someone asks for a new account.""" + + email = models.EmailField() + temp_identifier = models.TextField(max_length=100) + token = models.TextField(blank=True, null=True) + hostname = models.TextField(blank=True, null=True) + created = models.DateTimeField(default=timezone.now) \ No newline at end of file diff --git a/authentication/selectors.py b/authentication/selectors.py index df2656cb..ae82c522 100644 --- a/authentication/selectors.py +++ b/authentication/selectors.py @@ -2,10 +2,13 @@ import jwt from django.conf import settings -from django.contrib.auth.models import User, Permission -from authentication.models import Authentication +from django.contrib.auth.models import User +from django.db.models import Q +from authentication.models import Authentication, NewUser from rest_framework.authtoken.models import Token - +from prefix.selectors import get_user_prefixes +from prefix.models import Prefix +from biocompute.selectors import get_authorized_bcos def get_anon()-> User: """Get AnonymosUser @@ -38,12 +41,25 @@ def check_user_email(email: str)-> bool: Using the provided email check for a user in the DB """ + try: if User.objects.get(email=email): return True except User.DoesNotExist: return False +def check_new_user(email: str) -> bool: + """Check for new user + + Using the provided email check for a new user in the DB. + """ + + try: + NewUser.objects.get(email=email) + return True + except NewUser.DoesNotExist: + return False + def get_user_info(user: User) -> dict: """Get User Info @@ -56,34 +72,18 @@ def get_user_info(user: User) -> dict: A dict with the user information. """ - token = Token.objects.get(user=user.pk) - other_info = { - "permissions": {}, - "account_creation": "", - "account_expiration": "", - } - user_perms = {"user": [], "groups": []} - - for permission in user.user_permissions.all(): - if permission.name not in user_perms["user"]: - user_perms["user"].append(permission.name) - - for group in user.groups.all(): - if group.name not in user_perms["groups"]: - user_perms["groups"].append(group.name) - for permission in Permission.objects.filter(group=group): - if permission.name not in user_perms["user"]: - user_perms["user"].append(permission.name) - - other_info["permissions"] = user_perms - - other_info["account_creation"] = user.date_joined - return { - "hostname": settings.ALLOWED_HOSTS[0], + "hostname": settings.HOSTNAME, "human_readable_hostname": settings.HUMAN_READABLE_HOSTNAME, "public_hostname": settings.PUBLIC_HOSTNAME, - "token": token.key, + "token": Token.objects.get(user=user.pk).key, "username": user.username, - "other_info": other_info, + "permissions": { + "owned_prefixes": Prefix.objects.filter( + Q(owner=user) + ).values_list('prefix', flat=True).distinct(), + "permissions": get_user_prefixes(user), + "BCOs": get_authorized_bcos(user) + }, + "account_creation": user.date_joined } \ No newline at end of file diff --git a/authentication/services.py b/authentication/services.py index 2a27c67a..e73b6101 100644 --- a/authentication/services.py +++ b/authentication/services.py @@ -4,8 +4,12 @@ import json import requests import jsonschema +from django.db import transaction +from django.conf import settings from django.contrib.auth.models import User, Group +from django.core.mail import send_mail from rest_framework import exceptions, status, serializers +from rest_framework.authtoken.models import Token from rest_framework.response import Response from rest_framework_jwt.authentication import BaseAuthentication from rest_framework_jwt.settings import api_settings @@ -13,48 +17,71 @@ from google.oauth2 import id_token from google.auth.transport import requests as g_requests from authentication.selectors import get_anon -from authentication.models import Authentication +from authentication.models import Authentication, NewUser +ANON_KEY = settings.ANON_KEY jwt_decode_handler = api_settings.JWT_DECODE_HANDLER class CustomJSONWebTokenAuthentication(BaseAuthentication): - + """ + Custom JSON Web Token Authentication class that supports different types + of tokens including Bearer tokens from various issuers like ORCID, Google, + and the BioCompute Portal. + + Methods: + authenticate(self, request): + Authenticates the request based on the 'Authorization' header containing + either 'Bearer' or 'Token' type credentials. + + Raises: + AuthenticationFailed: If the token is invalid, expired, or the issuer is not recognized. + """ + def authenticate(self, request): if 'Authorization' in request.headers: type, token = request.headers['Authorization'].split(' ') if type == 'Bearer': if token == "null": - token = "627626823549f787c3ec763ff687169206626149" + token = ANON_KEY user = get_anon() - return (user, token) + try: unverified_payload = jwt.decode(token, None, False) except Exception as exp: raise exceptions.AuthenticationFailed(exp) - + user = None if unverified_payload['iss'] == 'https://orcid.org' or unverified_payload['iss'] == 'https://sandbox.orcid.org': user = authenticate_orcid(unverified_payload, token) if unverified_payload['iss'] == 'accounts.google.com': user = authenticate_google(token) if unverified_payload['iss'] in ['http://localhost:8080', 'https://test.portal.biochemistry.gwu.edu', 'https://biocomputeobject.org']: user = authenticate_portal(unverified_payload, token) - try: + if user: return (user, token) - except UnboundLocalError as exp: + else: raise exceptions.AuthenticationFailed("Authentication failed. Token issuer not found. Please contact the site admin") - if type == 'Token' or type == 'TOKEN': - pass pass def authenticate_portal(payload: dict, token:str)-> User: """Authenticate Portal - Custom function to authenticate BCO Portal credentials. - """ + + Authenticates a user for the BioCompute Portal using a JWT payload and token. + Args: + payload (dict): The JWT payload. + token (str): The authentication token. + + Returns: + User: The authenticated user object or None if authentication fails. + + Raises: + AuthenticationFailed: If the token verification fails or the user does not exist. + """ + response = requests.post( payload['iss']+'/users/auth/verify/', json={"token":token} ) @@ -64,35 +91,62 @@ def authenticate_portal(payload: dict, token:str)-> User: except User.DoesNotExist: return None else: - exceptions.AuthenticationFailed(response.reason) + raise exceptions.AuthenticationFailed(response.reason) def validate_auth_service(value): - schema = { - "type": "object", - "required": ["iss", "sub"], - "additionalProperties": False, - "properties": { - "iss": { - "type": "string", - "description": "The 'iss' (issuer) claim identifies the principal that issued the JWT." - }, - "sub": { - "type": "string", - "description": "The 'sub' (subject) claim identifies the principal that is the subject of the JWT." - } + """ + Validates a JWT against a defined JSON schema to ensure it includes + mandatory 'iss' and 'sub' claims. + + Args: + value (dict): The JWT claims to validate. + + Returns: + int: Returns 1 if validation is successful, or a dictionary containing + error message if failed. + + Raises: + ValidationError: If the JWT does not conform to the expected schema. + """ + + schema = { + "type": "object", + "required": ["iss", "sub"], + "additionalProperties": False, + "properties": { + "iss": { + "type": "string", + "description": "The 'iss' (issuer) claim identifies the principal that issued the JWT." + }, + "sub": { + "type": "string", + "description": "The 'sub' (subject) claim identifies the principal that is the subject of the JWT." } } - try: - jsonschema.validate(value, schema) - except jsonschema.ValidationError as error: - data = {"message": error.message} - return data - return 1 + } + try: + jsonschema.validate(value, schema) + except jsonschema.ValidationError as error: + data = {"message": error.message} + return data + return {"message": "valid"} def authenticate_orcid(payload:dict, token:str)-> User: """Authenticate ORCID - Custom function to authenticate ORCID credentials. + Authenticates a user based on ORCID credentials using a JWT payload and + token. + + Args: + payload (dict): The JWT payload. + token (str): The authentication token. + + Returns: + User: The authenticated user object or None if authentication fails. + + Raises: + AuthenticationFailed: If JWT verification fails or the user is not + found. """ orcid_jwks = { @@ -105,8 +159,8 @@ def authenticate_orcid(payload:dict, token:str)-> User: try: jwt.decode(token, key=orcid_key, algorithms=['RS256'], audience=['APP-88DEA42BRILGEHKC', 'APP-ZQZ0BL62NV9SBWAX']) except Exception as exp: - print('exp:', exp) raise exceptions.AuthenticationFailed(exp) + try: user = User.objects.get(username=Authentication.objects.get(auth_service__icontains=payload['sub']).username) except (Authentication.DoesNotExist, User.DoesNotExist): @@ -116,7 +170,16 @@ def authenticate_orcid(payload:dict, token:str)-> User: def authenticate_google(token: str) -> bool: """Authenticate Google - Custom function to authenticate Google credentials. + Authenticates a user based on Google credentials using an authentication token. + + Args: + token (str): The Google authentication token. + + Returns: + bool: True if the user is authenticated, False otherwise. + + Raises: + AuthenticationFailed: If Google verification fails or the user does not exist. """ idinfo = id_token.verify_oauth2_token(token, g_requests.Request()) try: @@ -124,17 +187,15 @@ def authenticate_google(token: str) -> bool: except User.DoesNotExist: return None -def custom_jwt_handler(token, user=None, request=None, public_key=None): - """Custom JWT Handler - Triggered by any user authentication. This will gater all the associated - user information and return that along with the validated JWT - """ +def validate_token(token: str, url: str)-> bool: + """Validate BCO Portal token - print('hadley', token) - return request + Args: + token (str): The authentication token to be validated. + url (str): The base URL of the authentication service where the token will be verified. -def validate_token(token: str, url: str)-> bool: - """ + Returns: + bool: True if the token is successfully validated (response status code 201), False otherwise. """ headers = {"Content-type": "application/json; charset=UTF-8",} @@ -149,26 +210,66 @@ def validate_token(token: str, url: str)-> bool: return False return True +@transaction.atomic +def send_new_user_email(user_info: dict) -> 0: + """Send New User Email + + New BCODB user authentication email + """ + + activation_link = str( + settings.PUBLIC_HOSTNAME + + "/api/accounts/activate/" + + user_info['email'] + + "/" + + user_info['temp_identifier'] + ) + + send_mail( + subject="Registration for BioCompute Portal", + message="Testing.", + html_message='

Please click this link within the next' \ + + ' 24 hours to activate your BioCompute Portal account: ' \ + + f'{activation_link}' \ + + '.

', + from_email="mail_sender@portal.aws.biochemistry.gwu.edu", + recipient_list=[user_info['email']], + fail_silently=False, + ) + NewUser.objects.create(**user_info) + print("Email signal sent") + return 0 -def create_bcodb(user_info: dict) -> User: +@transaction.atomic +def create_bcodb_user(email: str) -> User: """Create BCODB user """ - username = user_info["email"].split("@")[0] + username = email.split("@")[0] user = User.objects.create_user( - username=username, email=user_info["email"] + username=username, email=email ) user.set_unusable_password() user.full_clean() + Token.objects.create(user=user) user.save() - user.groups.add(Group.objects.get(name="bco_drafter")) - user.groups.add(Group.objects.get(name="bco_publisher")) - + return user - def send_bcodb(data: str, request_info: dict): - """ + """Send activation email + + The function constructs an activation link and sends it to the new user's + email address useing Django's send_mail function. + The function is wrapped in a `transaction`, ensuring that all database + changes are rolled back if any part of the function fails. + + Args: + user_info (dict): A dictionary containing the user's email and a + temporary identifier for account activation. + + Returns: + 0: Indicates successful execution of the function. """ token = request_info['token'] diff --git a/authentication/urls.py b/authentication/urls.py index e5a62ff5..a9e628c2 100644 --- a/authentication/urls.py +++ b/authentication/urls.py @@ -1,10 +1,27 @@ # authentication/urls.py from django.urls import path -from authentication.apis import RegisterBcodbAPI, AddAuthenticationApi, RemoveAuthenticationApi, ResetTokenApi +from rest_framework_jwt.views import obtain_jwt_token, verify_jwt_token +from authentication.apis import ( + NewAccountApi, + AccountActivateApi, + RegisterUserNoVerificationAPI, + AccountDescribeApi, + AddAuthenticationApi, + RemoveAuthenticationApi, + ResetTokenApi +) urlpatterns = [ - path("auth/register/", RegisterBcodbAPI.as_view()), + # path("token/", obtain_jwt_token), + # path("verify/", verify_jwt_token), + path( + "accounts/activate//", + AccountActivateApi.as_view(), + ), + path("accounts/describe/", AccountDescribeApi.as_view()), + path("accounts/new/", NewAccountApi.as_view()), + path("auth/register/", RegisterUserNoVerificationAPI.as_view()), path("auth/add/", AddAuthenticationApi.as_view()), path("auth/remove/", RemoveAuthenticationApi.as_view()), path("auth/reset_token/", ResetTokenApi.as_view()) diff --git a/bcodb/fixtures/local_data.json b/bcodb/fixtures/local_data.json deleted file mode 100644 index 7550d9ae..00000000 --- a/bcodb/fixtures/local_data.json +++ /dev/null @@ -1,6544 +0,0 @@ -[ - - { - "model": "authtoken.token", - "pk": "2f2a599026581c158a07f968c56292c77f4be875", - "fields": { - "user": 2, - "created": "2023-10-11T14:53:20.557Z" - } - }, - { - "model": "authtoken.token", - "pk": "258220666ca9e667e560b42a1eb4d4080fc1c744", - "fields": { - "user": 5, - "created": "2023-10-11T14:53:20.560Z" - } - }, - { - "model": "authtoken.token", - "pk": "44d2669cfca52239ef4d97e1a7cfdd69d4eeb95e", - "fields": { - "user": 4, - "created": "2023-10-11T14:53:20.559Z" - } - }, - { - "model": "authtoken.token", - "pk": "627626823549f787c3ec763ff687169206626149", - "fields": { - "user": 3, - "created": "2023-10-11T14:53:20.558Z" - } - }, - { - "model": "authtoken.token", - "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", - "fields": { - "user": 8, - "created": "2023-10-11T14:53:20.564Z" - } - }, - { - "model": "authtoken.token", - "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", - "fields": { - "user": 7, - "created": "2023-10-11T14:53:20.563Z" - } - }, - { - "model": "authtoken.token", - "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", - "fields": { - "user": 6, - "created": "2023-10-11T14:53:20.561Z" - } - }, - { - "model": "authtoken.token", - "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", - "fields": { - "user": 9, - "created": "2023-10-11T14:53:20.565Z" - } - }, - { - "model": "admin.logentry", - "pk": 1, - "fields": { - "action_time": "2022-06-28T23:06:35.693Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://localhost:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 2, - "fields": { - "action_time": "2022-06-28T23:08:10.571Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://localhost:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 3, - "fields": { - "action_time": "2022-06-28T23:09:47.922Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://localhost:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 4, - "fields": { - "action_time": "2022-06-28T23:12:37.828Z", - "user": 6, - "content_type": 10, - "object_id": "2", - "object_repr": "http://localhost:8000/BCO_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 5, - "fields": { - "action_time": "2022-06-28T23:14:01.431Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://localhost:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 6, - "fields": { - "action_time": "2022-06-28T23:16:50.236Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://localhost:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 7, - "fields": { - "action_time": "2022-06-28T23:19:25.710Z", - "user": 6, - "content_type": 10, - "object_id": "3", - "object_repr": "http://localhost:8000/BCO_000002/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 8, - "fields": { - "action_time": "2022-06-28T23:21:05.713Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "http://localhost:8000/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 9, - "fields": { - "action_time": "2022-06-28T23:21:43.425Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "http://localhost:8000/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 10, - "fields": { - "action_time": "2022-06-28T23:23:00.080Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "http://localhost:8000/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 11, - "fields": { - "action_time": "2022-06-28T23:23:13.087Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://localhost:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 12, - "fields": { - "action_time": "2022-06-28T23:41:21.155Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "http://localhost:8000/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 13, - "fields": { - "action_time": "2022-06-28T23:43:57.562Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://localhost:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 14, - "fields": { - "action_time": "2022-06-28T23:44:43.690Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://localhost:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "auth.permission", - "pk": 1, - "fields": { - "name": "Can add log entry", - "content_type": 1, - "codename": "add_logentry" - } - }, - { - "model": "auth.permission", - "pk": 2, - "fields": { - "name": "Can change log entry", - "content_type": 1, - "codename": "change_logentry" - } - }, - { - "model": "auth.permission", - "pk": 3, - "fields": { - "name": "Can delete log entry", - "content_type": 1, - "codename": "delete_logentry" - } - }, - { - "model": "auth.permission", - "pk": 4, - "fields": { - "name": "Can view log entry", - "content_type": 1, - "codename": "view_logentry" - } - }, - { - "model": "auth.permission", - "pk": 5, - "fields": { - "name": "Can add permission", - "content_type": 2, - "codename": "add_permission" - } - }, - { - "model": "auth.permission", - "pk": 6, - "fields": { - "name": "Can change permission", - "content_type": 2, - "codename": "change_permission" - } - }, - { - "model": "auth.permission", - "pk": 7, - "fields": { - "name": "Can delete permission", - "content_type": 2, - "codename": "delete_permission" - } - }, - { - "model": "auth.permission", - "pk": 8, - "fields": { - "name": "Can view permission", - "content_type": 2, - "codename": "view_permission" - } - }, - { - "model": "auth.permission", - "pk": 9, - "fields": { - "name": "Can add group", - "content_type": 3, - "codename": "add_group" - } - }, - { - "model": "auth.permission", - "pk": 10, - "fields": { - "name": "Can change group", - "content_type": 3, - "codename": "change_group" - } - }, - { - "model": "auth.permission", - "pk": 11, - "fields": { - "name": "Can delete group", - "content_type": 3, - "codename": "delete_group" - } - }, - { - "model": "auth.permission", - "pk": 12, - "fields": { - "name": "Can view group", - "content_type": 3, - "codename": "view_group" - } - }, - { - "model": "auth.permission", - "pk": 13, - "fields": { - "name": "Can add user", - "content_type": 4, - "codename": "add_user" - } - }, - { - "model": "auth.permission", - "pk": 14, - "fields": { - "name": "Can change user", - "content_type": 4, - "codename": "change_user" - } - }, - { - "model": "auth.permission", - "pk": 15, - "fields": { - "name": "Can delete user", - "content_type": 4, - "codename": "delete_user" - } - }, - { - "model": "auth.permission", - "pk": 16, - "fields": { - "name": "Can view user", - "content_type": 4, - "codename": "view_user" - } - }, - { - "model": "auth.permission", - "pk": 17, - "fields": { - "name": "Can add content type", - "content_type": 5, - "codename": "add_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 18, - "fields": { - "name": "Can change content type", - "content_type": 5, - "codename": "change_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 19, - "fields": { - "name": "Can delete content type", - "content_type": 5, - "codename": "delete_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 20, - "fields": { - "name": "Can view content type", - "content_type": 5, - "codename": "view_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 21, - "fields": { - "name": "Can add session", - "content_type": 6, - "codename": "add_session" - } - }, - { - "model": "auth.permission", - "pk": 22, - "fields": { - "name": "Can change session", - "content_type": 6, - "codename": "change_session" - } - }, - { - "model": "auth.permission", - "pk": 23, - "fields": { - "name": "Can delete session", - "content_type": 6, - "codename": "delete_session" - } - }, - { - "model": "auth.permission", - "pk": 24, - "fields": { - "name": "Can view session", - "content_type": 6, - "codename": "view_session" - } - }, - { - "model": "auth.permission", - "pk": 25, - "fields": { - "name": "Can add Token", - "content_type": 7, - "codename": "add_token" - } - }, - { - "model": "auth.permission", - "pk": 26, - "fields": { - "name": "Can change Token", - "content_type": 7, - "codename": "change_token" - } - }, - { - "model": "auth.permission", - "pk": 27, - "fields": { - "name": "Can delete Token", - "content_type": 7, - "codename": "delete_token" - } - }, - { - "model": "auth.permission", - "pk": 28, - "fields": { - "name": "Can view Token", - "content_type": 7, - "codename": "view_token" - } - }, - { - "model": "auth.permission", - "pk": 29, - "fields": { - "name": "Can add token", - "content_type": 8, - "codename": "add_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 30, - "fields": { - "name": "Can change token", - "content_type": 8, - "codename": "change_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 31, - "fields": { - "name": "Can delete token", - "content_type": 8, - "codename": "delete_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 32, - "fields": { - "name": "Can view token", - "content_type": 8, - "codename": "view_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 33, - "fields": { - "name": "Can add new_users", - "content_type": 9, - "codename": "add_new_users" - } - }, - { - "model": "auth.permission", - "pk": 34, - "fields": { - "name": "Can change new_users", - "content_type": 9, - "codename": "change_new_users" - } - }, - { - "model": "auth.permission", - "pk": 35, - "fields": { - "name": "Can delete new_users", - "content_type": 9, - "codename": "delete_new_users" - } - }, - { - "model": "auth.permission", - "pk": 36, - "fields": { - "name": "Can view new_users", - "content_type": 9, - "codename": "view_new_users" - } - }, - { - "model": "auth.permission", - "pk": 37, - "fields": { - "name": "Can add bco", - "content_type": 10, - "codename": "add_bco" - } - }, - { - "model": "auth.permission", - "pk": 38, - "fields": { - "name": "Can change bco", - "content_type": 10, - "codename": "change_bco" - } - }, - { - "model": "auth.permission", - "pk": 39, - "fields": { - "name": "Can delete bco", - "content_type": 10, - "codename": "delete_bco" - } - }, - { - "model": "auth.permission", - "pk": 40, - "fields": { - "name": "Can view bco", - "content_type": 10, - "codename": "view_bco" - } - }, - { - "model": "auth.permission", - "pk": 41, - "fields": { - "name": "Can add prefix_table", - "content_type": 11, - "codename": "add_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 42, - "fields": { - "name": "Can change prefix_table", - "content_type": 11, - "codename": "change_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 43, - "fields": { - "name": "Can delete prefix_table", - "content_type": 11, - "codename": "delete_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 44, - "fields": { - "name": "Can view prefix_table", - "content_type": 11, - "codename": "view_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 45, - "fields": { - "name": "Can add group info", - "content_type": 12, - "codename": "add_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 46, - "fields": { - "name": "Can change group info", - "content_type": 12, - "codename": "change_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 47, - "fields": { - "name": "Can delete group info", - "content_type": 12, - "codename": "delete_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 48, - "fields": { - "name": "Can view group info", - "content_type": 12, - "codename": "view_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 49, - "fields": { - "name": "Can add prefix", - "content_type": 13, - "codename": "add_prefix" - } - }, - { - "model": "auth.permission", - "pk": 50, - "fields": { - "name": "Can change prefix", - "content_type": 13, - "codename": "change_prefix" - } - }, - { - "model": "auth.permission", - "pk": 51, - "fields": { - "name": "Can delete prefix", - "content_type": 13, - "codename": "delete_prefix" - } - }, - { - "model": "auth.permission", - "pk": 52, - "fields": { - "name": "Can view prefix", - "content_type": 13, - "codename": "view_prefix" - } - }, - { - "model": "auth.permission", - "pk": 53, - "fields": { - "name": "Can add BCOs with prefix BCO", - "content_type": 10, - "codename": "add_BCO" - } - }, - { - "model": "auth.permission", - "pk": 54, - "fields": { - "name": "Can change BCOs with prefix BCO", - "content_type": 10, - "codename": "change_BCO" - } - }, - { - "model": "auth.permission", - "pk": 55, - "fields": { - "name": "Can delete BCOs with prefix BCO", - "content_type": 10, - "codename": "delete_BCO" - } - }, - { - "model": "auth.permission", - "pk": 56, - "fields": { - "name": "Can view BCOs with prefix BCO", - "content_type": 10, - "codename": "view_BCO" - } - }, - { - "model": "auth.permission", - "pk": 57, - "fields": { - "name": "Can draft BCOs with prefix BCO", - "content_type": 10, - "codename": "draft_BCO" - } - }, - { - "model": "auth.permission", - "pk": 58, - "fields": { - "name": "Can publish BCOs with prefix BCO", - "content_type": 10, - "codename": "publish_BCO" - } - }, - { - "model": "auth.permission", - "pk": 59, - "fields": { - "name": "Can add group object permission", - "content_type": 14, - "codename": "add_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 60, - "fields": { - "name": "Can change group object permission", - "content_type": 14, - "codename": "change_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 61, - "fields": { - "name": "Can delete group object permission", - "content_type": 14, - "codename": "delete_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 62, - "fields": { - "name": "Can view group object permission", - "content_type": 14, - "codename": "view_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 63, - "fields": { - "name": "Can add user object permission", - "content_type": 15, - "codename": "add_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 64, - "fields": { - "name": "Can change user object permission", - "content_type": 15, - "codename": "change_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 65, - "fields": { - "name": "Can delete user object permission", - "content_type": 15, - "codename": "delete_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 66, - "fields": { - "name": "Can view user object permission", - "content_type": 15, - "codename": "view_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 67, - "fields": { - "name": "Can add BCOs with prefix TEST", - "content_type": 10, - "codename": "add_TEST" - } - }, - { - "model": "auth.permission", - "pk": 68, - "fields": { - "name": "Can change BCOs with prefix TEST", - "content_type": 10, - "codename": "change_TEST" - } - }, - { - "model": "auth.permission", - "pk": 69, - "fields": { - "name": "Can delete BCOs with prefix TEST", - "content_type": 10, - "codename": "delete_TEST" - } - }, - { - "model": "auth.permission", - "pk": 70, - "fields": { - "name": "Can view BCOs with prefix TEST", - "content_type": 10, - "codename": "view_TEST" - } - }, - { - "model": "auth.permission", - "pk": 71, - "fields": { - "name": "Can draft BCOs with prefix TEST", - "content_type": 10, - "codename": "draft_TEST" - } - }, - { - "model": "auth.permission", - "pk": 72, - "fields": { - "name": "Can publish BCOs with prefix TEST", - "content_type": 10, - "codename": "publish_TEST" - } - }, - { - "model": "auth.permission", - "pk": 73, - "fields": { - "name": "Can add BCOs with prefix OTHER", - "content_type": 10, - "codename": "add_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 74, - "fields": { - "name": "Can change BCOs with prefix OTHER", - "content_type": 10, - "codename": "change_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 75, - "fields": { - "name": "Can delete BCOs with prefix OTHER", - "content_type": 10, - "codename": "delete_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 76, - "fields": { - "name": "Can view BCOs with prefix OTHER", - "content_type": 10, - "codename": "view_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 77, - "fields": { - "name": "Can draft BCOs with prefix OTHER", - "content_type": 10, - "codename": "draft_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 78, - "fields": { - "name": "Can publish BCOs with prefix OTHER", - "content_type": 10, - "codename": "publish_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 79, - "fields": { - "name": "Can add blacklisted token", - "content_type": 16, - "codename": "add_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 80, - "fields": { - "name": "Can change blacklisted token", - "content_type": 16, - "codename": "change_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 81, - "fields": { - "name": "Can delete blacklisted token", - "content_type": 16, - "codename": "delete_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 82, - "fields": { - "name": "Can view blacklisted token", - "content_type": 16, - "codename": "view_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 83, - "fields": { - "name": "Can add authentication", - "content_type": 17, - "codename": "add_authentication" - } - }, - { - "model": "auth.permission", - "pk": 84, - "fields": { - "name": "Can change authentication", - "content_type": 17, - "codename": "change_authentication" - } - }, - { - "model": "auth.permission", - "pk": 85, - "fields": { - "name": "Can delete authentication", - "content_type": 17, - "codename": "delete_authentication" - } - }, - { - "model": "auth.permission", - "pk": 86, - "fields": { - "name": "Can view authentication", - "content_type": 17, - "codename": "view_authentication" - } - }, - { - "model": "auth.group", - "pk": 1, - "fields": { - "name": "bco_drafter", - "permissions": [ - 53, - 54, - 55, - 57, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 2, - "fields": { - "name": "bco_publisher", - "permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 3, - "fields": { - "name": "anon", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 4, - "fields": { - "name": "wheel", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 5, - "fields": { - "name": "group_admins", - "permissions": [ - 9, - 10, - 11, - 12 - ] - } - }, - { - "model": "auth.group", - "pk": 6, - "fields": { - "name": "prefix_admins", - "permissions": [ - 49, - 50, - 51, - 52 - ] - } - }, - { - "model": "auth.group", - "pk": 7, - "fields": { - "name": "AnonymousUser", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 8, - "fields": { - "name": "bco_api_user", - "permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 9, - "fields": { - "name": "test50", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 10, - "fields": { - "name": "hivelab37", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 11, - "fields": { - "name": "jdoe58", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 12, - "fields": { - "name": "test_drafter", - "permissions": [ - 67, - 68, - 69, - 71, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 13, - "fields": { - "name": "test_publisher", - "permissions": [ - 67, - 68, - 69, - 71, - 72, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 14, - "fields": { - "name": "other_drafter", - "permissions": [ - 73, - 74, - 75, - 77, - 76 - ] - } - }, - { - "model": "auth.group", - "pk": 15, - "fields": { - "name": "other_publisher", - "permissions": [ - 73, - 74, - 75, - 77, - 78, - 76 - ] - } - }, - { - "model": "auth.user", - "pk": 1, - "fields": { - "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", - "last_login": null, - "is_superuser": false, - "username": "bco_drafter", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.496Z", - "groups": [ - 1 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 2, - "fields": { - "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", - "last_login": null, - "is_superuser": false, - "username": "bco_publisher", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.506Z", - "groups": [ - 1, - 2 - ], - "user_permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", - "last_login": null, - "is_superuser": false, - "username": "anon", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.517Z", - "groups": [ - 3 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", - "last_login": null, - "is_superuser": true, - "username": "wheel", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.528Z", - "groups": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", - "last_login": null, - "is_superuser": false, - "username": "AnonymousUser", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.844Z", - "groups": [ - 1, - 2, - 7 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2022-06-28T23:05:37.314Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "BioCompute", - "last_name": "Objects", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53.381Z", - "groups": [ - 1, - 2, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "user_permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.user", - "pk": 7, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "test50", - "first_name": "", - "last_name": "", - "email": "test@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [ - 1, - 2, - 9, - 12 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 8, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab37", - "first_name": "HIVE", - "last_name": "Lab", - "email": "hivelab@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [ - 1, - 2, - 10 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 9, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe58", - "first_name": "John", - "last_name": "Doe", - "email": "jdoe@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [ - 1, - 2, - 11 - ], - "user_permissions": [] - } - }, - - { - "model": "sessions.session", - "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", - "expire_date": "2022-07-12T15:13:16.934Z" - } - }, - { - "model": "sessions.session", - "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", - "expire_date": "2022-05-24T20:46:02.514Z" - } - }, - { - "model": "sessions.session", - "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", - "expire_date": "2022-05-24T20:54:17.321Z" - } - }, - { - "model": "sessions.session", - "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", - "expire_date": "2022-07-12T23:05:37.317Z" - } - }, - { - "model": "api.bco", - "pk": 1, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000000/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.0", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:10:12.804Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/BCO_000000/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:10:17.996Z" - } - }, - { - "model": "api.bco", - "pk": 2, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/BCO_000001/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:13:13.841Z" - } - }, - { - "model": "api.bco", - "pk": 3, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000002/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0", - "created": "2018-11-29T11:29:08-0500", - "modified": "2022-06-28T23:19:38.283Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/BCO_000002/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:19:53.937Z" - } - }, - { - "model": "api.bco", - "pk": 4, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000003/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:47.218Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/BCO_000003/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:21:56.878Z" - } - }, - { - "model": "api.bco", - "pk": 5, - "fields": { - "contents": { - "object_id": "http://localhost:8000/TEST_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-06-28T23:44:49.394Z", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/TEST_000001/DRAFT", - "owner_group": "test_drafter", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:44:58.149Z" - } - }, - { - "model": "api.bco", - "pk": 6, - "fields": { - "contents": { - "object_id": "http://localhost:8000/OTHER_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": "", - "object_id": "http://localhost:8000/OTHER_000001/DRAFT", - "owner_group": "other_drafter", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:41:49.698Z" - } - }, - { - "model": "api.bco", - "pk": 7, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000000/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.3", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:06:43.263Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://localhost:8000/BCO_000000/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:10:18.007Z" - } - }, - { - "model": "api.bco", - "pk": 8, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": null, - "object_id": "http://localhost:8000/BCO_000001/1.0", - "owner_group": "test50", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:13:13.859Z" - } - }, - { - "model": "api.bco", - "pk": 9, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000002/1.0", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2000-09-26T14:43:45-0400" - }, - "created": "2018-11-29T11:29:08-0500", - "modified": "2018-11-30T11:29:08-0500", - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "xref": [ - { - "namespace": "uberon", - "name": "Uber Anatomy Ontology", - "ids": [ - "0001988" - ], - "access_time": "2016-11-30T06:46-0500" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2016-11-30T06:46-0500" - } - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - } - }, - "object_class": null, - "object_id": "http://localhost:8000/BCO_000002/1.0", - "owner_group": "hivelab37", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:19:53.938Z" - } - }, - { - "model": "api.bco", - "pk": 10, - "fields": { - "contents": { - "object_id": "http://localhost:8000/BCO_000003/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:13.091Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://localhost:8000/BCO_000003/1.0", - "owner_group": "jdoe58", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:21:56.879Z" - } - }, - { - "model": "api.bco", - "pk": 11, - "fields": { - "contents": { - "object_id": "http://localhost:8000/OTHER_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": null, - "object_id": "http://localhost:8000/OTHER_000001/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:41:49.719Z" - } - }, - { - "model": "api.bco", - "pk": 12, - "fields": { - "contents": { - "object_id": "http://localhost:8000/TEST_000001/1.2", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://localhost:8000/TEST_000001/1.2", - "owner_group": "bco_api_user", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:44:58.161Z" - } - }, - { - "model": "api.groupinfo", - "pk": 1, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Group administrators", - "expiration": null, - "group": "group_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 2, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Prefix administrators", - "expiration": null, - "group": "prefix_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 3, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 4, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 5, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 6, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.prefix_table", - "pk": 1, - "fields": { - "n_objects": 8, - "prefix": "BCO" - } - }, - { - "model": "api.prefix_table", - "pk": 2, - "fields": { - "n_objects": 3, - "prefix": "TEST" - } - }, - { - "model": "api.prefix_table", - "pk": 3, - "fields": { - "n_objects": 3, - "prefix": "OTHER" - } - }, - { - "model": "api.prefix", - "pk": 1, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T20:35:14.712Z", - "created_by": "bco_publisher", - "description": null, - "expires": null, - "owner_group": "bco_publisher", - "owner_user": "bco_publisher", - "prefix": "BCO" - } - }, - { - "model": "api.prefix", - "pk": 2, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:32.633Z", - "created_by": "bco_api_user", - "description": "Just a test prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "TEST" - } - }, - { - "model": "api.prefix", - "pk": 3, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:35.104Z", - "created_by": "bco_api_user", - "description": "Just an other prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER" - } - } -] \ No newline at end of file diff --git a/bcodb/fixtures/test_portal.json b/bcodb/fixtures/test_portal.json deleted file mode 100644 index 486e8b66..00000000 --- a/bcodb/fixtures/test_portal.json +++ /dev/null @@ -1,6544 +0,0 @@ -[ - - { - "model": "authtoken.token", - "pk": "2f2a599026581c158a07f968c56292c77f4be875", - "fields": { - "user": 2, - "created": "2023-10-11T14:53:20.557Z" - } - }, - { - "model": "authtoken.token", - "pk": "258220666ca9e667e560b42a1eb4d4080fc1c744", - "fields": { - "user": 5, - "created": "2023-10-11T14:53:20.560Z" - } - }, - { - "model": "authtoken.token", - "pk": "44d2669cfca52239ef4d97e1a7cfdd69d4eeb95e", - "fields": { - "user": 4, - "created": "2023-10-11T14:53:20.559Z" - } - }, - { - "model": "authtoken.token", - "pk": "627626823549f787c3ec763ff687169206626149", - "fields": { - "user": 3, - "created": "2023-10-11T14:53:20.558Z" - } - }, - { - "model": "authtoken.token", - "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", - "fields": { - "user": 8, - "created": "2023-10-11T14:53:20.564Z" - } - }, - { - "model": "authtoken.token", - "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", - "fields": { - "user": 7, - "created": "2023-10-11T14:53:20.563Z" - } - }, - { - "model": "authtoken.token", - "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", - "fields": { - "user": 6, - "created": "2023-10-11T14:53:20.561Z" - } - }, - { - "model": "authtoken.token", - "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", - "fields": { - "user": 9, - "created": "2023-10-11T14:53:20.565Z" - } - }, - { - "model": "admin.logentry", - "pk": 1, - "fields": { - "action_time": "2022-06-28T23:06:35.693Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 2, - "fields": { - "action_time": "2022-06-28T23:08:10.571Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 3, - "fields": { - "action_time": "2022-06-28T23:09:47.922Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 4, - "fields": { - "action_time": "2022-06-28T23:12:37.828Z", - "user": 6, - "content_type": 10, - "object_id": "2", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 5, - "fields": { - "action_time": "2022-06-28T23:14:01.431Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 6, - "fields": { - "action_time": "2022-06-28T23:16:50.236Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 7, - "fields": { - "action_time": "2022-06-28T23:19:25.710Z", - "user": 6, - "content_type": 10, - "object_id": "3", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 8, - "fields": { - "action_time": "2022-06-28T23:21:05.713Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 9, - "fields": { - "action_time": "2022-06-28T23:21:43.425Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 10, - "fields": { - "action_time": "2022-06-28T23:23:00.080Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 11, - "fields": { - "action_time": "2022-06-28T23:23:13.087Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 12, - "fields": { - "action_time": "2022-06-28T23:41:21.155Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 13, - "fields": { - "action_time": "2022-06-28T23:43:57.562Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 14, - "fields": { - "action_time": "2022-06-28T23:44:43.690Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "auth.permission", - "pk": 1, - "fields": { - "name": "Can add log entry", - "content_type": 1, - "codename": "add_logentry" - } - }, - { - "model": "auth.permission", - "pk": 2, - "fields": { - "name": "Can change log entry", - "content_type": 1, - "codename": "change_logentry" - } - }, - { - "model": "auth.permission", - "pk": 3, - "fields": { - "name": "Can delete log entry", - "content_type": 1, - "codename": "delete_logentry" - } - }, - { - "model": "auth.permission", - "pk": 4, - "fields": { - "name": "Can view log entry", - "content_type": 1, - "codename": "view_logentry" - } - }, - { - "model": "auth.permission", - "pk": 5, - "fields": { - "name": "Can add permission", - "content_type": 2, - "codename": "add_permission" - } - }, - { - "model": "auth.permission", - "pk": 6, - "fields": { - "name": "Can change permission", - "content_type": 2, - "codename": "change_permission" - } - }, - { - "model": "auth.permission", - "pk": 7, - "fields": { - "name": "Can delete permission", - "content_type": 2, - "codename": "delete_permission" - } - }, - { - "model": "auth.permission", - "pk": 8, - "fields": { - "name": "Can view permission", - "content_type": 2, - "codename": "view_permission" - } - }, - { - "model": "auth.permission", - "pk": 9, - "fields": { - "name": "Can add group", - "content_type": 3, - "codename": "add_group" - } - }, - { - "model": "auth.permission", - "pk": 10, - "fields": { - "name": "Can change group", - "content_type": 3, - "codename": "change_group" - } - }, - { - "model": "auth.permission", - "pk": 11, - "fields": { - "name": "Can delete group", - "content_type": 3, - "codename": "delete_group" - } - }, - { - "model": "auth.permission", - "pk": 12, - "fields": { - "name": "Can view group", - "content_type": 3, - "codename": "view_group" - } - }, - { - "model": "auth.permission", - "pk": 13, - "fields": { - "name": "Can add user", - "content_type": 4, - "codename": "add_user" - } - }, - { - "model": "auth.permission", - "pk": 14, - "fields": { - "name": "Can change user", - "content_type": 4, - "codename": "change_user" - } - }, - { - "model": "auth.permission", - "pk": 15, - "fields": { - "name": "Can delete user", - "content_type": 4, - "codename": "delete_user" - } - }, - { - "model": "auth.permission", - "pk": 16, - "fields": { - "name": "Can view user", - "content_type": 4, - "codename": "view_user" - } - }, - { - "model": "auth.permission", - "pk": 17, - "fields": { - "name": "Can add content type", - "content_type": 5, - "codename": "add_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 18, - "fields": { - "name": "Can change content type", - "content_type": 5, - "codename": "change_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 19, - "fields": { - "name": "Can delete content type", - "content_type": 5, - "codename": "delete_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 20, - "fields": { - "name": "Can view content type", - "content_type": 5, - "codename": "view_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 21, - "fields": { - "name": "Can add session", - "content_type": 6, - "codename": "add_session" - } - }, - { - "model": "auth.permission", - "pk": 22, - "fields": { - "name": "Can change session", - "content_type": 6, - "codename": "change_session" - } - }, - { - "model": "auth.permission", - "pk": 23, - "fields": { - "name": "Can delete session", - "content_type": 6, - "codename": "delete_session" - } - }, - { - "model": "auth.permission", - "pk": 24, - "fields": { - "name": "Can view session", - "content_type": 6, - "codename": "view_session" - } - }, - { - "model": "auth.permission", - "pk": 25, - "fields": { - "name": "Can add Token", - "content_type": 7, - "codename": "add_token" - } - }, - { - "model": "auth.permission", - "pk": 26, - "fields": { - "name": "Can change Token", - "content_type": 7, - "codename": "change_token" - } - }, - { - "model": "auth.permission", - "pk": 27, - "fields": { - "name": "Can delete Token", - "content_type": 7, - "codename": "delete_token" - } - }, - { - "model": "auth.permission", - "pk": 28, - "fields": { - "name": "Can view Token", - "content_type": 7, - "codename": "view_token" - } - }, - { - "model": "auth.permission", - "pk": 29, - "fields": { - "name": "Can add token", - "content_type": 8, - "codename": "add_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 30, - "fields": { - "name": "Can change token", - "content_type": 8, - "codename": "change_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 31, - "fields": { - "name": "Can delete token", - "content_type": 8, - "codename": "delete_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 32, - "fields": { - "name": "Can view token", - "content_type": 8, - "codename": "view_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 33, - "fields": { - "name": "Can add new_users", - "content_type": 9, - "codename": "add_new_users" - } - }, - { - "model": "auth.permission", - "pk": 34, - "fields": { - "name": "Can change new_users", - "content_type": 9, - "codename": "change_new_users" - } - }, - { - "model": "auth.permission", - "pk": 35, - "fields": { - "name": "Can delete new_users", - "content_type": 9, - "codename": "delete_new_users" - } - }, - { - "model": "auth.permission", - "pk": 36, - "fields": { - "name": "Can view new_users", - "content_type": 9, - "codename": "view_new_users" - } - }, - { - "model": "auth.permission", - "pk": 37, - "fields": { - "name": "Can add bco", - "content_type": 10, - "codename": "add_bco" - } - }, - { - "model": "auth.permission", - "pk": 38, - "fields": { - "name": "Can change bco", - "content_type": 10, - "codename": "change_bco" - } - }, - { - "model": "auth.permission", - "pk": 39, - "fields": { - "name": "Can delete bco", - "content_type": 10, - "codename": "delete_bco" - } - }, - { - "model": "auth.permission", - "pk": 40, - "fields": { - "name": "Can view bco", - "content_type": 10, - "codename": "view_bco" - } - }, - { - "model": "auth.permission", - "pk": 41, - "fields": { - "name": "Can add prefix_table", - "content_type": 11, - "codename": "add_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 42, - "fields": { - "name": "Can change prefix_table", - "content_type": 11, - "codename": "change_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 43, - "fields": { - "name": "Can delete prefix_table", - "content_type": 11, - "codename": "delete_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 44, - "fields": { - "name": "Can view prefix_table", - "content_type": 11, - "codename": "view_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 45, - "fields": { - "name": "Can add group info", - "content_type": 12, - "codename": "add_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 46, - "fields": { - "name": "Can change group info", - "content_type": 12, - "codename": "change_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 47, - "fields": { - "name": "Can delete group info", - "content_type": 12, - "codename": "delete_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 48, - "fields": { - "name": "Can view group info", - "content_type": 12, - "codename": "view_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 49, - "fields": { - "name": "Can add prefix", - "content_type": 13, - "codename": "add_prefix" - } - }, - { - "model": "auth.permission", - "pk": 50, - "fields": { - "name": "Can change prefix", - "content_type": 13, - "codename": "change_prefix" - } - }, - { - "model": "auth.permission", - "pk": 51, - "fields": { - "name": "Can delete prefix", - "content_type": 13, - "codename": "delete_prefix" - } - }, - { - "model": "auth.permission", - "pk": 52, - "fields": { - "name": "Can view prefix", - "content_type": 13, - "codename": "view_prefix" - } - }, - { - "model": "auth.permission", - "pk": 53, - "fields": { - "name": "Can add BCOs with prefix BCO", - "content_type": 10, - "codename": "add_BCO" - } - }, - { - "model": "auth.permission", - "pk": 54, - "fields": { - "name": "Can change BCOs with prefix BCO", - "content_type": 10, - "codename": "change_BCO" - } - }, - { - "model": "auth.permission", - "pk": 55, - "fields": { - "name": "Can delete BCOs with prefix BCO", - "content_type": 10, - "codename": "delete_BCO" - } - }, - { - "model": "auth.permission", - "pk": 56, - "fields": { - "name": "Can view BCOs with prefix BCO", - "content_type": 10, - "codename": "view_BCO" - } - }, - { - "model": "auth.permission", - "pk": 57, - "fields": { - "name": "Can draft BCOs with prefix BCO", - "content_type": 10, - "codename": "draft_BCO" - } - }, - { - "model": "auth.permission", - "pk": 58, - "fields": { - "name": "Can publish BCOs with prefix BCO", - "content_type": 10, - "codename": "publish_BCO" - } - }, - { - "model": "auth.permission", - "pk": 59, - "fields": { - "name": "Can add group object permission", - "content_type": 14, - "codename": "add_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 60, - "fields": { - "name": "Can change group object permission", - "content_type": 14, - "codename": "change_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 61, - "fields": { - "name": "Can delete group object permission", - "content_type": 14, - "codename": "delete_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 62, - "fields": { - "name": "Can view group object permission", - "content_type": 14, - "codename": "view_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 63, - "fields": { - "name": "Can add user object permission", - "content_type": 15, - "codename": "add_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 64, - "fields": { - "name": "Can change user object permission", - "content_type": 15, - "codename": "change_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 65, - "fields": { - "name": "Can delete user object permission", - "content_type": 15, - "codename": "delete_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 66, - "fields": { - "name": "Can view user object permission", - "content_type": 15, - "codename": "view_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 67, - "fields": { - "name": "Can add BCOs with prefix TEST", - "content_type": 10, - "codename": "add_TEST" - } - }, - { - "model": "auth.permission", - "pk": 68, - "fields": { - "name": "Can change BCOs with prefix TEST", - "content_type": 10, - "codename": "change_TEST" - } - }, - { - "model": "auth.permission", - "pk": 69, - "fields": { - "name": "Can delete BCOs with prefix TEST", - "content_type": 10, - "codename": "delete_TEST" - } - }, - { - "model": "auth.permission", - "pk": 70, - "fields": { - "name": "Can view BCOs with prefix TEST", - "content_type": 10, - "codename": "view_TEST" - } - }, - { - "model": "auth.permission", - "pk": 71, - "fields": { - "name": "Can draft BCOs with prefix TEST", - "content_type": 10, - "codename": "draft_TEST" - } - }, - { - "model": "auth.permission", - "pk": 72, - "fields": { - "name": "Can publish BCOs with prefix TEST", - "content_type": 10, - "codename": "publish_TEST" - } - }, - { - "model": "auth.permission", - "pk": 73, - "fields": { - "name": "Can add BCOs with prefix OTHER", - "content_type": 10, - "codename": "add_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 74, - "fields": { - "name": "Can change BCOs with prefix OTHER", - "content_type": 10, - "codename": "change_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 75, - "fields": { - "name": "Can delete BCOs with prefix OTHER", - "content_type": 10, - "codename": "delete_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 76, - "fields": { - "name": "Can view BCOs with prefix OTHER", - "content_type": 10, - "codename": "view_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 77, - "fields": { - "name": "Can draft BCOs with prefix OTHER", - "content_type": 10, - "codename": "draft_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 78, - "fields": { - "name": "Can publish BCOs with prefix OTHER", - "content_type": 10, - "codename": "publish_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 79, - "fields": { - "name": "Can add blacklisted token", - "content_type": 16, - "codename": "add_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 80, - "fields": { - "name": "Can change blacklisted token", - "content_type": 16, - "codename": "change_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 81, - "fields": { - "name": "Can delete blacklisted token", - "content_type": 16, - "codename": "delete_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 82, - "fields": { - "name": "Can view blacklisted token", - "content_type": 16, - "codename": "view_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 83, - "fields": { - "name": "Can add authentication", - "content_type": 17, - "codename": "add_authentication" - } - }, - { - "model": "auth.permission", - "pk": 84, - "fields": { - "name": "Can change authentication", - "content_type": 17, - "codename": "change_authentication" - } - }, - { - "model": "auth.permission", - "pk": 85, - "fields": { - "name": "Can delete authentication", - "content_type": 17, - "codename": "delete_authentication" - } - }, - { - "model": "auth.permission", - "pk": 86, - "fields": { - "name": "Can view authentication", - "content_type": 17, - "codename": "view_authentication" - } - }, - { - "model": "auth.group", - "pk": 1, - "fields": { - "name": "bco_drafter", - "permissions": [ - 53, - 54, - 55, - 57, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 2, - "fields": { - "name": "bco_publisher", - "permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 3, - "fields": { - "name": "anon", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 4, - "fields": { - "name": "wheel", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 5, - "fields": { - "name": "group_admins", - "permissions": [ - 9, - 10, - 11, - 12 - ] - } - }, - { - "model": "auth.group", - "pk": 6, - "fields": { - "name": "prefix_admins", - "permissions": [ - 49, - 50, - 51, - 52 - ] - } - }, - { - "model": "auth.group", - "pk": 7, - "fields": { - "name": "AnonymousUser", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 8, - "fields": { - "name": "bco_api_user", - "permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 9, - "fields": { - "name": "test50", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 10, - "fields": { - "name": "hivelab37", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 11, - "fields": { - "name": "jdoe58", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 12, - "fields": { - "name": "test_drafter", - "permissions": [ - 67, - 68, - 69, - 71, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 13, - "fields": { - "name": "test_publisher", - "permissions": [ - 67, - 68, - 69, - 71, - 72, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 14, - "fields": { - "name": "other_drafter", - "permissions": [ - 73, - 74, - 75, - 77, - 76 - ] - } - }, - { - "model": "auth.group", - "pk": 15, - "fields": { - "name": "other_publisher", - "permissions": [ - 73, - 74, - 75, - 77, - 78, - 76 - ] - } - }, - { - "model": "auth.user", - "pk": 1, - "fields": { - "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", - "last_login": null, - "is_superuser": false, - "username": "bco_drafter", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.496Z", - "groups": [ - 1 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 2, - "fields": { - "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", - "last_login": null, - "is_superuser": false, - "username": "bco_publisher", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.506Z", - "groups": [ - 1, - 2 - ], - "user_permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", - "last_login": null, - "is_superuser": false, - "username": "anon", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.517Z", - "groups": [ - 3 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", - "last_login": null, - "is_superuser": true, - "username": "wheel", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.528Z", - "groups": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", - "last_login": null, - "is_superuser": false, - "username": "AnonymousUser", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.844Z", - "groups": [ - 1, - 2, - 7 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2022-06-28T23:05:37.314Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "BioCompute", - "last_name": "Objects", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53.381Z", - "groups": [ - 1, - 2, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "user_permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.user", - "pk": 7, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "test50", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [ - 1, - 2, - 9, - 12 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 8, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab37", - "first_name": "HIVE", - "last_name": "Lab", - "email": "hivelab@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [ - 1, - 2, - 10 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 9, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe58", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [ - 1, - 2, - 11 - ], - "user_permissions": [] - } - }, - - { - "model": "sessions.session", - "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", - "expire_date": "2022-07-12T15:13:16.934Z" - } - }, - { - "model": "sessions.session", - "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", - "expire_date": "2022-05-24T20:46:02.514Z" - } - }, - { - "model": "sessions.session", - "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", - "expire_date": "2022-05-24T20:54:17.321Z" - } - }, - { - "model": "sessions.session", - "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", - "expire_date": "2022-07-12T23:05:37.317Z" - } - }, - { - "model": "api.bco", - "pk": 1, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.0", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:10:12.804Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:10:17.996Z" - } - }, - { - "model": "api.bco", - "pk": 2, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:13:13.841Z" - } - }, - { - "model": "api.bco", - "pk": 3, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0", - "created": "2018-11-29T11:29:08-0500", - "modified": "2022-06-28T23:19:38.283Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:19:53.937Z" - } - }, - { - "model": "api.bco", - "pk": 4, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:47.218Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:21:56.878Z" - } - }, - { - "model": "api.bco", - "pk": 5, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-06-28T23:44:49.394Z", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", - "owner_group": "test_drafter", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:44:58.149Z" - } - }, - { - "model": "api.bco", - "pk": 6, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": "", - "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/DRAFT", - "owner_group": "other_drafter", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:41:49.698Z" - } - }, - { - "model": "api.bco", - "pk": 7, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.3", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:06:43.263Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:10:18.007Z" - } - }, - { - "model": "api.bco", - "pk": 8, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/1.0", - "owner_group": "test50", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:13:13.859Z" - } - }, - { - "model": "api.bco", - "pk": 9, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/1.0", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2000-09-26T14:43:45-0400" - }, - "created": "2018-11-29T11:29:08-0500", - "modified": "2018-11-30T11:29:08-0500", - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "xref": [ - { - "namespace": "uberon", - "name": "Uber Anatomy Ontology", - "ids": [ - "0001988" - ], - "access_time": "2016-11-30T06:46-0500" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2016-11-30T06:46-0500" - } - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - } - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000002/1.0", - "owner_group": "hivelab37", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:19:53.938Z" - } - }, - { - "model": "api.bco", - "pk": 10, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:13.091Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000003/1.0", - "owner_group": "jdoe58", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:21:56.879Z" - } - }, - { - "model": "api.bco", - "pk": 11, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/OTHER_000001/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:41:49.719Z" - } - }, - { - "model": "api.bco", - "pk": 12, - "fields": { - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.2", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.2", - "owner_group": "bco_api_user", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:44:58.161Z" - } - }, - { - "model": "api.groupinfo", - "pk": 1, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Group administrators", - "expiration": null, - "group": "group_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 2, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Prefix administrators", - "expiration": null, - "group": "prefix_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 3, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 4, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 5, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 6, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.prefix_table", - "pk": 1, - "fields": { - "n_objects": 8, - "prefix": "BCO" - } - }, - { - "model": "api.prefix_table", - "pk": 2, - "fields": { - "n_objects": 3, - "prefix": "TEST" - } - }, - { - "model": "api.prefix_table", - "pk": 3, - "fields": { - "n_objects": 3, - "prefix": "OTHER" - } - }, - { - "model": "api.prefix", - "pk": 1, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T20:35:14.712Z", - "created_by": "bco_publisher", - "description": null, - "expires": null, - "owner_group": "bco_publisher", - "owner_user": "bco_publisher", - "prefix": "BCO" - } - }, - { - "model": "api.prefix", - "pk": 2, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:32.633Z", - "created_by": "bco_api_user", - "description": "Just a test prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "TEST" - } - }, - { - "model": "api.prefix", - "pk": 3, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:35.104Z", - "created_by": "bco_api_user", - "description": "Just an other prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER" - } - } -] \ No newline at end of file diff --git a/bcodb/settings.py b/bcodb/settings.py deleted file mode 100644 index 366f04a0..00000000 --- a/bcodb/settings.py +++ /dev/null @@ -1,288 +0,0 @@ -""" -Django settings for bco_editor project. -Generated by 'django-admin startproject' using Django 3.0.9. -For more information on this file, see -https://docs.djangoproject.com/en/3.0/topics/settings/ -For the full list of settings and their values, see -https://docs.djangoproject.com/en/3.0/ref/settings/ -""" - -import os -from datetime import timedelta - -# For importing configuration files -import configparser - -# For importing schema -from api.scripts.utilities import SettingsUtils - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -# --- SECURITY SETTINGS --- # -# Load the server config file. -server_config = configparser.ConfigParser() -server_config.read(BASE_DIR + "/server.conf") - -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ - -# Is this a production server? -PRODUCTION = server_config["PRODUCTION"]["production"] - -# Set the anonymous user's key. -ANON_KEY = server_config["KEYS"]["anon"] - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = "$vz@#@^q(od&$rf&*6^z!m5nh6qw2*cq*j6fha#^h9(r7$xqy4" - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = PRODUCTION - -# The human-readable hostname. -HUMAN_READABLE_HOSTNAME = server_config["HRHOSTNAME"]["hrnames"] - -if server_config["GROUP_PREFIX"]["allow_all_creation"] == "True": - GROUP = True - PREFIX = True -elif server_config["GROUP_PREFIX"]["allow_group_creation"] == "True": - GROUP = True -elif server_config["GROUP_PREFIX"]["allow_prefix_creation"] == "True": - PREFIX = True - -# The publicly accessible hostname. -if server_config["PRODUCTION"]["production"] == "True": - PUBLIC_HOSTNAME = server_config["PUBLICHOSTNAME"]["prod_name"] -elif server_config["PRODUCTION"]["production"] == "False": - PUBLIC_HOSTNAME = server_config["PUBLICHOSTNAME"]["name"] - -# Source: https://dzone.com/articles/how-to-fix-django-cors-error - -# Check for open (public) access to the API. -if server_config["REQUESTS_FROM"]["public"].strip() == "false": - - # Process the requester groups. - - # configparser automatically strips white space off the - # ends of arguments. - requesters = [ - server_config["REQUESTS_FROM"][i].strip() - for i in server_config["REQUESTS_FROM"] - ] - requesters.remove("false") - requesters = [i.split(",") for i in requesters] - - # Flatten the list. - # Source: https://stackabuse.com/python-how-to-flatten-list-of-lists/ - flattened = [item.strip() for sublist in requesters for item in sublist] - - if server_config["PRODUCTION"]["production"] == "True": - ALLOWED_HOSTS = [ - i.strip() for i in server_config["HOSTNAMES"]["prod_names"].split(",") - ] - elif server_config["PRODUCTION"]["production"] == "False": - ALLOWED_HOSTS = [ - i.strip() for i in server_config["HOSTNAMES"]["names"].split(",") - ] - - CORS_ORIGIN_ALLOW_ALL = False - CORS_ORIGIN_WHITELIST = tuple(flattened) - -elif server_config["REQUESTS_FROM"]["public"].strip() == "true": - if server_config["PRODUCTION"]["production"] == "True": - ALLOWED_HOSTS = [server_config["HOSTNAMES"]["prod_names"].split(",")[0], "*"] - CORS_ORIGIN_ALLOW_ALL = True - elif server_config["PRODUCTION"]["production"] == "False": - ALLOWED_HOSTS = [server_config["HOSTNAMES"]["names"].split(",")[0], "*"] - CORS_ORIGIN_ALLOW_ALL = True - -# Use the REST framework -REST_FRAMEWORK = { - "DEFAULT_AUTHENTICATION_CLASSES": [ - 'authentication.services.CustomJSONWebTokenAuthentication', - "rest_framework.authentication.TokenAuthentication", - 'rest_framework.authentication.SessionAuthentication', - 'rest_framework.authentication.BasicAuthentication', - ], - "DEFAULT_PERMISSION_CLASSES": ["rest_framework.permissions.IsAuthenticated"], - "DEFAULT_SCHEMA_CLASS": "rest_framework.schemas.coreapi.AutoSchema", - - -} - -JWT_AUTH = { - "JWT_RESPONSE_PAYLOAD_HANDLER": "authentication.services.custom_jwt_handler", - "JWT_EXPIRATION_DELTA": timedelta(seconds=604800), - "JWT_REFRESH_EXPIRATION_DELTA": timedelta(days=14), - "JWT_ALLOW_REFRESH": True, -} - -# Password validation -# https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", - }, - { - "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", - }, - { - "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", - }, - { - "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", - }, -] - -# Object-level permissions with django-guardian -# Source: https://github.com/django-guardian/django-guardian#configuration -AUTHENTICATION_BACKENDS = [ - "django.contrib.auth.backends.ModelBackend", - "guardian.backends.ObjectPermissionBackend", -] - -# --- APPLICATION --- # -# Application definition - -# Token-based authentication. -# Source: https://www.django-rest-framework.org/api-guide/authentication/#tokenau thentication -INSTALLED_APPS = [ - "django.contrib.admin", - "django.contrib.admindocs", - "django.contrib.auth", - "django.contrib.contenttypes", - "django.contrib.sessions", - "django.contrib.messages", - "django.contrib.staticfiles", - "drf_yasg", - "rest_framework", - "rest_framework.authtoken", - 'rest_framework_jwt', - 'rest_framework_jwt.blacklist', - "rest_framework_swagger", - "reset_migrations", - "guardian", - "api", - "authentication.apps.Authentication" -] - -# Source: https://dzone.com/articles/how-to-fix-django-cors-error -MIDDLEWARE = [ - "django.middleware.security.SecurityMiddleware", - "django.contrib.sessions.middleware.SessionMiddleware", - "corsheaders.middleware.CorsMiddleware", - "django.middleware.common.CommonMiddleware", - "django.middleware.csrf.CsrfViewMiddleware", - "django.contrib.auth.middleware.AuthenticationMiddleware", - "django.contrib.messages.middleware.MessageMiddleware", - "django.middleware.clickjacking.XFrameOptionsMiddleware", -] - -ROOT_URLCONF = "bcodb.urls" - -TEMPLATES = [ - { - "BACKEND": "django.template.backends.django.DjangoTemplates", - "DIRS": [], - "APP_DIRS": True, - "OPTIONS": { - "context_processors": [ - "django.template.context_processors.debug", - "django.template.context_processors.request", - "django.contrib.auth.context_processors.auth", - "django.contrib.messages.context_processors.messages", - ], - }, - }, -] - -SWAGGER_SETTINGS = { - "SECURITY_DEFINITIONS": { - "Bearer": {"type": "apiKey", "name": "Authorization", "in": "header"} - }, - "DEEP_LINKING": True, -} - -REDOC_SETTINGS = {"LAZY_RENDERING": False} - -WSGI_APPLICATION = "bcodb.wsgi.application" - -# Database -# https://docs.djangoproject.com/en/3.0/ref/settings/#databases - -DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": server_config["DATABASES"]["path"], - } -} - -# Internationalization -# https://docs.djangoproject.com/en/3.0/topics/i18n/ - -LANGUAGE_CODE = "en-us" - -TIME_ZONE = "UTC" - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/3.0/howto/static-files/ - -STATIC_URL = "/api/static/" -# STATICFILES_DIRS = [os.path.join(BASE_DIR, 'static')] -STATIC_ROOT = "/var/www/bcoeditor/bco_api/bco_api/static/" - -# ----- CUSTOM VARIABLES AND METHODS ----- # -# Load request and validation templates (definitions). -# Note that we will get TWO loads of settings.py if we start without runserver --noreload -# There is only set of definitions for requests, but for validations, we may have sub-folders. -# First, the request definitions. - -REQUEST_TEMPLATES = SettingsUtils.SettingsUtils().load_schema_local( - search_parameters={"request_definitions/": ".schema"}, mode="requests" -) - -# Define the schema for each request type. -REQUEST_TEMPLATES = SettingsUtils.SettingsUtils().define_request_schema( - schema=REQUEST_TEMPLATES["request_definitions/"] -) - - -# The validation situation is more complex. - -# First, we need to get all of the folders under validation_definitions. -VALIDATION_TEMPLATES = SettingsUtils.SettingsUtils().load_schema_local( - search_parameters={"validation_definitions/": ".schema"}, mode="validations" -) - -# Make the object naming accessible as a dictionary. -OBJECT_NAMING = {} - -if server_config["PRODUCTION"]["production"] == "True": - - for i in server_config["OBJECT_NAMING"]: - if i.split("_")[0] == "prod": - - # Strip out the production flag. - STRIPPED = "_".join(i.split("_")[1:]) - - OBJECT_NAMING[STRIPPED] = server_config["OBJECT_NAMING"][i] - -elif server_config["PRODUCTION"]["production"] == "False": - - for i in server_config["OBJECT_NAMING"]: - if i.split("_")[0] != "prod": - OBJECT_NAMING[i] = server_config["OBJECT_NAMING"][i] - -# emailing notifications -EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend" -EMAIL_HOST = "localhost" -EMAIL_PORT = 25 -DEFAULT_AUTO_FIELD = "django.db.models.AutoField" diff --git a/bcodb/urls.py b/bcodb/urls.py deleted file mode 100755 index 3020dc26..00000000 --- a/bcodb/urls.py +++ /dev/null @@ -1,16 +0,0 @@ -"""URL Configuration - -Top level URL configuration for BCO DB. See `api.urls` for APIs -""" -from django.contrib import admin -from django.urls import path, include -from rest_framework_jwt.views import obtain_jwt_token, verify_jwt_token - -urlpatterns = [ - path("api/admin/", admin.site.urls), - path("api/token/", obtain_jwt_token), - path("api/verify/", verify_jwt_token), - path("", include("api.urls")), - path("api/", include("authentication.urls")), - path("api/", include("search.urls")), -] diff --git a/api/__init__.py b/biocompute/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from api/__init__.py rename to biocompute/__init__.py diff --git a/biocompute/admin.py b/biocompute/admin.py new file mode 100644 index 00000000..6e9b5cdd --- /dev/null +++ b/biocompute/admin.py @@ -0,0 +1,44 @@ +from django.contrib import admin +from django.utils.html import format_html_join +from django.utils.safestring import mark_safe +from .models import Bco, User, Prefix + +class BcoAdmin(admin.ModelAdmin): + list_display = ( + 'object_id', + 'owner', + 'prefix', + 'state', + 'score', + 'last_update', + 'access_count', + 'display_authorized_users' + ) + search_fields = ['object_id', 'owner__username', 'state'] + list_filter = ('state', 'last_update') + fieldsets = ( + (None, { + 'fields': ('object_id', 'contents') + }), + ('Permissions', { + 'fields': ('owner', 'authorized_users') + }), + ('Details', { + 'fields': ( + 'prefix', + 'state', + 'score', + 'last_update', + 'access_count' + ) + }), + ) + filter_horizontal = ('authorized_users',) + + def display_authorized_users(self, obj): + """Return a comma-separated list of authorized user names.""" + return mark_safe(", ".join( + [user.username for user in obj.authorized_users.all()])) + display_authorized_users.short_description = "Authorized Users" + +admin.site.register(Bco, BcoAdmin) diff --git a/biocompute/apis.py b/biocompute/apis.py new file mode 100644 index 00000000..71b1d60f --- /dev/null +++ b/biocompute/apis.py @@ -0,0 +1,705 @@ +#!/usr/bin/env python3 +#biocompute/apis.py + +"""BioCompute Object APIs +""" + +from authentication.services import CustomJSONWebTokenAuthentication +from biocompute.services import ( + BcoDraftSerializer, + BcoValidator, + ModifyBcoDraftSerializer, + publish_draft, + bco_counter_increment +) +from biocompute.selectors import ( + retrieve_bco, + user_can_modify_bco, + user_can_publish_bco, +) +from config.services import ( + legacy_api_converter, + response_constructor, + response_status, +) +from drf_yasg import openapi +from drf_yasg.utils import swagger_auto_schema +from django.conf import settings +from django.db import utils +from prefix.selectors import user_can_draft_prefix +from rest_framework import status +from rest_framework.views import APIView +from rest_framework.permissions import IsAuthenticated, AllowAny +from rest_framework.response import Response +from tests.fixtures.testing_bcos import BCO_000001_DRAFT + +hostname = settings.PUBLIC_HOSTNAME + +BCO_DRAFT_SCHEMA = openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Create BCO Draft Schema", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["prefix", "contents"], + properties={ + "object_id": openapi.Schema( + type=openapi.TYPE_STRING, + description="BCO Object ID.", + example=f"{hostname}/BCO_000001/DRAFT" + ), + "prefix": openapi.Schema( + type=openapi.TYPE_STRING, + description="BCO Prefix to use", + example="TEST" + ), + "authorized_users": openapi.Schema( + type=openapi.TYPE_ARRAY, + description="Users that can access the BCO draft.", + items=openapi.Schema(type=openapi.TYPE_STRING, example="tester") + ), + "contents": openapi.Schema( + type=openapi.TYPE_OBJECT, + description="Contents of the BCO.", + example=BCO_000001_DRAFT + ), + }, + ), + description="BCO Drafts to create.", + ) + +class DraftsCreateApi(APIView): + """Create BCO Draft [Bulk Enabled] + + API endpoint for creating new BioCompute Object (BCO) drafts, with support + for bulk operations. + + This endpoint allows authenticated users to create new BCO drafts + individually or in bulk by submitting a list of BCO drafts. The operation + can be performed for one or more drafts in a single request. Each draft is + validated and processed independently, allowing for mixed response + statuses (HTTP_207_MULTI_STATUS) in the case of bulk submissions. + """ + + permission_classes = [IsAuthenticated,] + + @swagger_auto_schema( + operation_id="api_objects_drafts_create", + request_body=BCO_DRAFT_SCHEMA, + responses={ + 200: "All requests were accepted.", + 207: "Some requests failed and some succeeded. Each object submitted" + " will have it's own response object with it's own status" + " code and message.\n", + 400: "All requests were rejected.", + 403: "Invalid token.", + }, + tags=["BCO Management"], + ) + + def post(self, request) -> Response: + response_data = [] + owner = request.user + data = request.data + rejected_requests = False + accepted_requests = False + + if 'POST_api_objects_draft_create' in request.data: + data = legacy_api_converter(request.data) + + if data[0]["contents"]["object_id"]==BCO_000001_DRAFT["object_id"] and\ + request.data[0]["prefix"] == "TEST": + test_object_id = BCO_000001_DRAFT["object_id"] + return Response( + status=status.HTTP_200_OK, + data=[ + response_constructor( + identifier=test_object_id, + status = "SUCCESS", + code= 200, + message= f"TESTING: BCO {test_object_id} created", + ) + ] + ) + + for index, object in enumerate(data): + response_id = object["contents"].get("object_id", index) + bco_prefix = object.get("prefix", index) + prefix_permitted = user_can_draft_prefix(owner, bco_prefix) + + if prefix_permitted is None: + response_data.append(response_constructor( + identifier=response_id, + status = "NOT FOUND", + code= 404, + message= f"Invalid prefix: {bco_prefix}.", + )) + rejected_requests = True + continue + + if prefix_permitted is False: + response_data.append(response_constructor( + identifier=response_id, + status = "FORBIDDEN", + code= 400, + message= f"User, {owner}, does not have draft permissions"\ + + " for prefix {bco_prefix}.", + )) + rejected_requests = True + continue + + serialized_bco = BcoDraftSerializer(data=object, context={'request': request}) + if serialized_bco.is_valid(): + try: + bco_instance = serialized_bco.create(serialized_bco.validated_data) + response_id = bco_instance.object_id + score = bco_instance.score + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 200, + message= f"BCO {response_id} created with a score of {score}", + )) + accepted_requests = True + + except Exception as err: + response_data.append(response_constructor( + identifier=serialized_bco['object_id'].value, + status = "SERVER ERROR", + code= 500, + message= f"BCO {serialized_bco['object_id'].value} failed", + )) + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"BCO {response_id} rejected", + data=serialized_bco.errors + )) + rejected_requests = True + + status_code = response_status(accepted_requests, rejected_requests) + return Response(status=status_code, data=response_data) + +class DraftsPublishApi(APIView): + """Publish Draft BCO [Bulk Enabled] + + API endpoint for publishing BioCompute Object (BCO) drafts, with support + for bulk operations. + + This endpoint allows authenticated users to publish existing BCO drafts + individually or in bulk by submitting a list of BCO drafts. The operation + can be performed for one or more drafts in a single request. Each draft is + validated and processed independently, allowing for mixed response + statuses (HTTP_207_MULTI_STATUS) in the case of bulk submissions. + """ + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_id="api_objects_drafts_publish", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Publish BCO Draft Schema", + description="Publish draft BCO [Bulk Enabled]", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["object_id"], + properties={ + "published_object_id": openapi.Schema( + type=openapi.TYPE_STRING, + description="BCO Object ID to use for published object.", + example=f"{hostname}/TEST_000001/1.0" + ), + "object_id": openapi.Schema( + type=openapi.TYPE_STRING, + description="BCO Object Draft ID to look up.", + example=f"{hostname}/TEST_000001/DRAFT" + ), + "delete_draft": openapi.Schema( + type=openapi.TYPE_BOOLEAN, + description="Whether or not to delete the draft."\ + +" False by default.", + example=False + ), + } + ) + ), + responses={ + 200: "All requests were accepted.", + 207: "Some requests failed and some succeeded. Each object submitted" + " will have it's own response object with it's own status" + " code and message.\n", + 400: "All requests were rejected.", + 403: "Invalid token.", + }, + tags=["BCO Management"], + ) + + def post(self, request) -> Response: + validator = BcoValidator() + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + if 'POST_api_objects_drafts_publish' in request.data: + data = legacy_api_converter(request.data) + + if "object_id" in data[0] and data[0]["object_id"] == \ + f"{hostname}/TEST_000001/DRAFT": + identifier= f"{hostname}/TEST_000001/DRAFT" + return Response( + status=status.HTTP_200_OK, + data=[response_constructor( + identifier=identifier, + status = "SUCCESS", + code= 201, + message= f"TESTING: BCO {identifier} has been published" + " and assigned TEST as a score." + )] + ) + + for index, object in enumerate(data): + response_id = object.get("object_id", index) + bco_instance = user_can_publish_bco(object, requester) + + if bco_instance is None: + response_data.append(response_constructor( + identifier=response_id, + status = "NOT FOUND", + code= 404, + message= f"Invalid BCO: {response_id} does not exist.", + )) + rejected_requests = True + continue + + if bco_instance is False: + response_data.append(response_constructor( + identifier=response_id, + status = "FORBIDDEN", + code= 403, + message= f"User, {requester}, does not have draft permissions"\ + + f" for BCO {response_id}.", + )) + rejected_requests = True + continue + + if type(bco_instance) is str: + response_data.append(response_constructor( + identifier=response_id, + status = "BAD REQUEST", + code= 400, + message= bco_instance + )) + rejected_requests = True + continue + + if type(bco_instance) is tuple: + response_data.append(response_constructor( + identifier=response_id, + status = "BAD REQUEST", + code= 400, + message= f"Invalid `published_object_id`."\ + + f"{bco_instance[0]} and {bco_instance[1]}"\ + + " do not match.", + )) + rejected_requests = True + continue + + if bco_instance.state == 'PUBLISHED': + object_id = bco_instance.object_id + response_data.append(response_constructor( + identifier=response_id, + status = "CONFLICT", + code= 409, + message= f"Invalid `object_id`: {object_id} already"\ + + " exists.", + )) + rejected_requests = True + continue + + bco_results = validator.parse_and_validate(bco_instance.contents) + identifier, results = bco_results.popitem() + + if results["number_of_errors"] > 0: + rejected_requests = True + bco_status = "FAILED" + status_code = 400 + message = "BCO not valid" + else: + accepted_requests = True + bco_status = "SUCCESS" + status_code = 200 + message = "BCO valid" + + response_data.append(response_constructor( + identifier = identifier, + status=bco_status, + code=status_code, + message=message, + data=results + )) + + status_code = response_status(accepted_requests, rejected_requests) + return Response(status=status_code, data=response_data) + +class DraftsModifyApi(APIView): + """Modify BCO Draft [Bulk Enabled] + + API endpoint for modifying BioCompute Object (BCO) drafts, with support + for bulk operations. + + This endpoint allows authenticated users to modify existing BCO drafts + individually or in bulk by submitting a list of BCO drafts. The operation + can be performed for one or more drafts in a single request. Each draft is + validated and processed independently, allowing for mixed response + statuses (HTTP_207_MULTI_STATUS) in the case of bulk submissions. + """ + + permission_classes = [IsAuthenticated,] + + @swagger_auto_schema( + operation_id="api_objects_drafts_modify", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Modify BCO Draft Schema", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["object_id"], + properties={ + "object_id": openapi.Schema( + type=openapi.TYPE_STRING, + description="BCO Object ID.", + example=f"{hostname}/BCO_000001/DRAFT" + ), + "authorized_users": openapi.Schema( + type=openapi.TYPE_ARRAY, + description="Users which can access the BCO draft.", + items=openapi.Schema(type=openapi.TYPE_STRING, example="tester") + ), + "contents": openapi.Schema( + type=openapi.TYPE_OBJECT, + description="Contents of the BCO.", + example=BCO_000001_DRAFT + ), + }, + ), + description="BCO Drafts to create.", + ), + responses={ + 200: "All requests were accepted.", + 207: "Some requests failed and some succeeded. Each object submitted" + " will have it's own response object with it's own status" + " code and message.\n", + 400: "All requests were rejected.", + 403: "Invalid token.", + }, + tags=["BCO Management"], + ) + + def post(self, request) -> Response: + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + + if 'POST_api_objects_drafts_modify' in request.data: + data = legacy_api_converter(request.data) + + if data[0]["contents"]["object_id"]==BCO_000001_DRAFT["object_id"] and\ + request.data[0]["authorized_users"] == ["tester"]: + test_object_id = BCO_000001_DRAFT["object_id"] + return Response( + status=status.HTTP_200_OK, + data=[ + response_constructor( + identifier=test_object_id, + status = "SUCCESS", + code= 200, + message= f"TESTING: BCO {test_object_id} updated", + ) + ] + ) + + for index, object in enumerate(data): + response_id = object.get("object_id", index) + modify_permitted = user_can_modify_bco(response_id, requester) + + if modify_permitted is None: + response_data.append(response_constructor( + identifier=response_id, + status = "NOT FOUND", + code= 404, + message= f"Invalid BCO: {response_id}.", + )) + rejected_requests = True + continue + + if modify_permitted is False: + response_data.append(response_constructor( + identifier=response_id, + status = "FORBIDDEN", + code= 400, + message= f"User, {requester}, does not have draft permissions"\ + + f" for BCO {response_id}.", + )) + rejected_requests = True + continue + + serialized_bco = ModifyBcoDraftSerializer(data=object) + + if serialized_bco.is_valid(): + try: + bco_instance = serialized_bco.update(serialized_bco.validated_data) + score = bco_instance.score + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 200, + message= f"BCO {response_id} updated with a sore of {score}", + )) + accepted_requests = True + + except Exception as err: + response_data.append(response_constructor( + identifier=response_id, + status = "SERVER ERROR", + code= 500, + message= f"BCO {response_id} failed. {err}", + )) + rejected_requests = True + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"BCO {response_id} rejected", + data=bco.errors + )) + rejected_requests = True + + if accepted_requests is False and rejected_requests == True: + return Response( + status=status.HTTP_400_BAD_REQUEST, + data=response_data + ) + + if accepted_requests is True and rejected_requests is True: + return Response( + status=status.HTTP_207_MULTI_STATUS, + data=response_data + ) + + if accepted_requests is True and rejected_requests is False: + return Response( + status=status.HTTP_200_OK, + data=response_data + ) + +class ValidateBcoApi(APIView): + """Bulk Validate BCOs [Bulk Enabled] + + -------------------- + + Bulk operation to validate BCOs. + + ```JSON + [ + {...BCO CONTENTS...}, + {...BCO CONTENTS...} + ] + + """ + + authentication_classes = [] + permission_classes = [AllowAny] + + @swagger_auto_schema( + operation_id="api_bco_validate", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Validate BCO against Schema", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["contents"], + description="Contents of the BCO.", + example=BCO_000001_DRAFT + + ), + description="Validate BCO against IEEE schema.", + ), + responses={ + 200: "All BCO validations are successful.", + 207: "Some or all BCO validations failed. Each object submitted" + " will have it's own response object with it's own status" + " message:\n", + 400: "Bad request." + }, + tags=["BCO Management"], + ) + def post(self, request): + validator = BcoValidator() + response_data = [] + rejected_requests = False + accepted_requests = True + data = request.data + if 'POST_validate_bco' in request.data: + data = legacy_api_converter(data=request.data) + + for index, object in enumerate(data): + bco_results = validator.parse_and_validate(bco=object) + identifier, results = bco_results.popitem() + + if results["number_of_errors"] > 0: + rejected_requests = True + bco_status = "FAILED" + status_code = 400 + message = "BCO not valid" + else: + accepted_requests = True + bco_status = "SUCCESS" + status_code = 200 + message = "BCO valid" + + response_data.append(response_constructor( + identifier = identifier, + status=bco_status, + code=status_code, + message=message, + data=results + )) + + status_code = response_status(accepted_requests, rejected_requests) + return Response(status=status_code, data=response_data) + +class DraftRetrieveApi(APIView): + """Get a draft object + + API View to Retrieve a Draft Object + + This view allows authenticated users to retrieve the contents of a specific + draft object identified by its BioCompute Object (BCO) accession number. + The operation ensures that only users with appropriate permissions can + access the draft contents. Upon successfull retrieval of object the + `access_count` is for this object is incremented. + + Parameters: + - bco_accession (str): + A string parameter passed in the URL path that uniquely identifies the + draft object to be retrieved. + """ + + @swagger_auto_schema( + operation_id="api_get_draft", + manual_parameters=[ + openapi.Parameter( + "bco_accession", + openapi.IN_PATH, + description="Object ID to be viewed.", + type=openapi.TYPE_STRING, + default="BCO_000000" + ) + ], + responses={ + 200: "Success. Object contents returned", + 401: "Authentication credentials were not provided, or" + " the token was invalid.", + 403: "Forbidden. The requestor does not have appropriate permissions.", + 404: "Not found. That draft could not be found on the server." + }, + tags=["BCO Management"], + ) + + def get(self, request, bco_accession): + requester = request.user + bco_instance = retrieve_bco(bco_accession, requester) + if bco_instance is False: + return Response( + status=status.HTTP_403_FORBIDDEN, + data={"message": f"User, {requester}, does not have draft permissions"\ + + f" for {bco_accession}."}) + if bco_instance is None: + return Response( + status=status.HTTP_404_NOT_FOUND, + data={"message": f"{bco_accession}/DRAFT, could "\ + + "not be found on the server." + } + ) + + bco_counter_increment(bco_instance) + return Response(status=status.HTTP_200_OK, data=bco_instance.contents) + +class PublishedRetrieveApi(APIView): + """Get Published BCO + + API view for retrieving a specific version of a published BioCompute + Object (BCO). + + Retrieve the contents of a published BCO by specifying its accession + number and version. Authentication is not required to access most + published BCOs, reflecting the public nature of these objects. If + the prefix is not public than the user's ability to view this BCO + is verified. + + Parameters: + - `bco_accession`: + Specifies the accession number of the BCO to be retrieved. + + - `bco_version`: + Specifies the version of the BCO to be retrieved. + """ + + authentication_classes = [CustomJSONWebTokenAuthentication] + permission_classes = [AllowAny] + + @swagger_auto_schema( + operation_id="api_get_published", + manual_parameters=[ + openapi.Parameter( + "bco_accession", + openapi.IN_PATH, + description="BCO accession to be viewed.", + type=openapi.TYPE_STRING, + default="BCO_000000" + ), + openapi.Parameter( + "bco_version", + openapi.IN_PATH, + description="BCO version to be viewed.", + type=openapi.TYPE_STRING, + default="1.0" + ) + ], + responses={ + 200: "Success. Object contents returned", + 401: "Authentication credentials were not provided, or" + " the token was invalid.", + 403: "Forbidden. The requestor does not have appropriate permissions.", + 404: "Not found. That BCO could not be found on the server." + }, + tags=["BCO Management"], + ) + + def get(self, request, bco_accession, bco_version): + requester = request.user + bco_instance = retrieve_bco(bco_accession, requester, bco_version) + if bco_instance is False: + return Response( + status=status.HTTP_403_FORBIDDEN, + data={"message": f"User, {requester}, does not have draft permissions"\ + + f" for {bco_accession}."}) + + if bco_instance is None: + return Response( + status=status.HTTP_404_NOT_FOUND, + data={"message": f"{bco_accession}/{bco_version}, could "\ + + "not be found on the server." + } + ) + + bco_counter_increment(bco_instance) + return Response(status=status.HTTP_200_OK, data=bco_instance.contents) diff --git a/biocompute/migrations/0001_initial.py b/biocompute/migrations/0001_initial.py new file mode 100644 index 00000000..1dc1eeb4 --- /dev/null +++ b/biocompute/migrations/0001_initial.py @@ -0,0 +1,32 @@ +# Generated by Django 3.2.13 on 2024-04-11 21:16 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('prefix', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Bco', + fields=[ + ('object_id', models.TextField(primary_key=True, serialize=False)), + ('contents', models.JSONField()), + ('state', models.CharField(choices=[('REFERENCED', 'referenced'), ('PUBLISHED', 'published'), ('DRAFT', 'draft'), ('DELETE', 'delete')], default='DRAFT', max_length=20)), + ('score', models.IntegerField(default=0)), + ('last_update', models.DateTimeField()), + ('access_count', models.IntegerField(default=0)), + ('authorized_users', models.ManyToManyField(blank=True, related_name='authorized_bcos', to=settings.AUTH_USER_MODEL)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_bcos', to=settings.AUTH_USER_MODEL, to_field='username')), + ('prefix', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='prefix.prefix')), + ], + ), + ] diff --git a/api/migrations/__init__.py b/biocompute/migrations/__init__.py similarity index 100% rename from api/migrations/__init__.py rename to biocompute/migrations/__init__.py diff --git a/biocompute/models.py b/biocompute/models.py new file mode 100644 index 00000000..274b81be --- /dev/null +++ b/biocompute/models.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# biocompute/models.py + +import sys +from django.db import models +from django.conf import settings +from django.contrib.auth.models import Group, User +from django.db.models.signals import post_save +from django.dispatch import receiver +from django.utils import timezone +from rest_framework.authtoken.models import Token +from prefix.models import Prefix + +STATE_CHOICES = [ + ("REFERENCED", "referenced"), + ("PUBLISHED", "published"), + ("DRAFT", "draft"), + ("DELETE", "delete") +] + +class Bco(models.Model): + """BioComput Object Model. + + Attributes: + ----------- + object_id: str + BCO Object Identifier, and primary key + contents: JSONField + BCO JSON contents + owner = ForeignKey(User) + String representing the django.contrib.auth.models.User that 'owns' the object + authorized_users: ManyToManyField(User) + String representing the Users that have access to the object + prefix: str + Prefix for the BCO + state:str + State of object. REFERENCED, PUBLISHED, DRAFT, and DELETE are currently accepted values. + score:int + Score assigned to BCO at the time of publishing. + last_update: DateTime + Date Time object for the last database change to this object + access_count: Int + number of times this object has been downloaded + + """ + + object_id = models.TextField(primary_key=True) + contents = models.JSONField() + prefix = models.ForeignKey( + Prefix, + on_delete=models.CASCADE, + to_field="prefix" + ) + owner = models.ForeignKey( + User, + to_field="username", + on_delete=models.CASCADE, + related_name="owned_bcos" + ) + authorized_users = models.ManyToManyField( + User, + related_name="authorized_bcos", + blank=True + ) + state = models.CharField( + max_length=20, + choices=STATE_CHOICES, + default="DRAFT" + ) + score = models.IntegerField(default=0) + last_update = models.DateTimeField() + access_count = models.IntegerField(default=0) + + def __str__(self): + """String for representing the BCO model (in Admin site etc.).""" + return str(self.object_id) diff --git a/biocompute/selectors.py b/biocompute/selectors.py new file mode 100644 index 00000000..d75f1734 --- /dev/null +++ b/biocompute/selectors.py @@ -0,0 +1,271 @@ +# biocompute/selectors.py + +"""BioCompute Selectors + +Functions to query the database related to BioCompute Objects +""" + +import pytz +from biocompute.models import Bco +from datetime import datetime +from django.conf import settings +from django.contrib.auth. models import User +from django.db.models import Q +from prefix.selectors import ( + user_can_view_prefix, + user_can_modify_prefix, + user_can_publish_prefix +) + +def datetime_converter(input_date): + """Datetime converter + + Convert between a datetime object and an ISO 8601 formatted string. If the + input is a datetime object, it converts it to an ISO 8601 formatted string + with 'Z' as timezone (UTC). If the input is a string in ISO 8601 format, + it converts it to a datetime object with UTC timezone. + + Parameters: + - input_date (datetime or str): + The date to be converted, either as a datetime object or an ISO 8601 + string. + + Returns: + - datetime or str: + The converted date, either as an ISO 8601 string or a datetime object + with UTC timezone. + """ + + if isinstance(input_date, datetime): + return input_date.isoformat( + timespec='milliseconds').replace('+00:00', 'Z') + elif isinstance(input_date, str): + return datetime.fromisoformat( + input_date.rstrip('Z')).replace(tzinfo=pytz.UTC) + else: + raise ValueError("Input must be either a datetime object or a string"\ + + " in ISO 8601 format.") + +def prefix_from_object_id(object_id: str) -> str: + """Prefix From Object ID + + Parses a BCO object ID to extract the prefix part of the ID. + + Parameters: + - object_id (str): + The object ID from which the prefix needs to be extracted. + + Returns: + - str: + The extracted prefix name from the provided object ID. + + Raises: + - ValueError: + If the prefix cannot be extracted. + """ + + try: + prefix_name = object_id_deconstructor(object_id)[-2].split("_")[0] + return prefix_name + + except IndexError: + raise ValueError( + f"The object ID '{object_id}' does not conform to the expected"\ + + "format and the prefix cannot be extracted." + ) + +def user_can_publish_bco(object: dict, user:User) -> Bco: + """Publish BCO + + Determines if a user has permission to publish a specific BioCompute + Object (BCO). + + Checks if a given user is authorized to publish a BCO identified by its + `object_id` based on the following conditiions: + 1. The BCO exists. + 2. The user has general 'publish' permissions for the prefix associated + with the BCO, providing broader modification rights across BCOs with + the same prefix. + + Parameters: + - object_id (str): + The unique identifier of the BCO + - user (User): + The user whose modification permissions are being verified. + + Returns: + - Bco: + if the user is authorized to publish the specified BCO, `False` + otherwise. Returns `None` if the specified BCO does not exist. + """ + + draft_deconstructed = object_id_deconstructor(object["object_id"]) + published_deconstructed = [] + if "published_object_id" in object: + published_deconstructed = object_id_deconstructor( + object["published_object_id"] + ) + if published_deconstructed[-2] != draft_deconstructed[-2]: + return published_deconstructed[-2], draft_deconstructed[-2] + + try: + published_object = Bco.objects.get( + object_id=object["published_object_id"] + ) + return published_object + except Bco.DoesNotExist: + pass + + try: + bco_instance = Bco.objects.get(object_id=object["object_id"]) + version = bco_instance.contents['provenance_domain']['version'] + if len(published_deconstructed) == 6: + version = \ + bco_instance.contents['provenance_domain']['version'] + if version != published_deconstructed[-1]: + message = f"BCO version, {version}, does not match "\ + + f"`published_object_id`, {published_deconstructed[0]}" + return message + else: + draft_deconstructed[-1] = version + published_object_id = '/'.join(draft_deconstructed[1:]) + try: + published_object = Bco.objects.get( + object_id=published_object_id + ) + return published_object + except Bco.DoesNotExist: + pass + + if bco_instance.owner == user: + return bco_instance + + except Bco.DoesNotExist: + return None + + publish_permission = user_can_publish_prefix( + user, prefix_from_object_id(object["object_id"]) + ) + if publish_permission is False: + return publish_permission + + return bco_instance + +def user_can_modify_bco(object_id: str, user:User) -> bool: + """Modify BCO + + Determines if a user has permission to modify a specific BioCompute + Object (BCO). + + Checks if a given user is authorized to modify a BCO identified by its + `object_id` based on the following conditiions: + 1. The user is listed in the `authorized_users` of the BCO instance, + allowing direct modification rights. + 2. The user has general 'modify' permissions for the prefix associated + with the BCO, providing broader modification rights across BCOs with + the same prefix. + + Parameters: + - object_id (str): + The unique identifier of the BCO + - user (User): + The user whose modification permissions are being verified. + + Returns: + - bool: + `True` if the user is authorized to modify the specified BCO, + `False` otherwise. Returns `None` if the specified BCO does not exist. + """ + + try: + bco_instance = Bco.objects.get(object_id=object_id) + except Bco.DoesNotExist: + return None + + if user in bco_instance.authorized_users.all(): + return True + + view_permission = user_can_modify_prefix( + user, prefix_from_object_id(object_id) + ) + + return view_permission + +def retrieve_bco(bco_accession:str, user:User, bco_version:str=None) -> bool: + """Retrieve BCO + + This function checks whether a given user has the permission to view a BCO + identified by its accession number and, optionally, its version. It + performs several checks: + + 1. Verifies if the BCO exists. If not, returns `None`. + 2. Checks if the user is explicitly authorized to view this specific BCO. + 3. Checks if the user has general 'view' permissions for the prefix + associated with the BCO. + """ + + hostname = settings.PUBLIC_HOSTNAME + + if bco_version is None: + object_id = f"{hostname}/{bco_accession}/DRAFT" + else: + object_id = f"{hostname}/{bco_accession}/{bco_version}" + + try: + bco_instance = Bco.objects.get(object_id=object_id) + except Bco.DoesNotExist: + return None + + if user in bco_instance.authorized_users.all(): + return bco_instance + + prefix_name = bco_accession.split("_")[0] + view_permission = user_can_view_prefix(prefix_name, user) + if view_permission is False: + return False + + return bco_instance + +def get_authorized_bcos(user: User): + """ + Retrieve all BioCompute Objects (BCOs) that a specific user is authorized + to access, excluding those in 'DELETE' state. + + Parameters: + - user (User): + The Django User instance for whom to retrieve authorized BCOs. + + Returns: + - QuerySet: + A Django QuerySet containing the BCOs the user is authorized to access. + """ + + bcos = Bco.objects.filter( + Q(owner=user) | Q(authorized_users=user) + ).exclude(state='DELETE').values_list('object_id', flat=True).distinct() + + + return bcos + +def object_id_deconstructor(object_id=str) -> list: + """ + Deconstructs a BioCompute Object (BCO) identifier into its constituent + parts (protocol, hostname, BCO accession, and BCO version). + + Parameters: + - object_id (str): + The unique identifier of a BCO. This identifier should follow the + recommended format which includes the protocol, hostname, BCO + accession (prefix and identifier), and version. + + Returns: + - list: + A list where the first element is the original `object_id` followed + by its deconstructed parts: + [original object_id, protocol, hostname, BCO accession, version] + """ + + deconstructed_object_id = object_id.split("/") + deconstructed_object_id.insert(0, object_id) + return deconstructed_object_id + diff --git a/biocompute/services.py b/biocompute/services.py new file mode 100644 index 00000000..15125024 --- /dev/null +++ b/biocompute/services.py @@ -0,0 +1,621 @@ +#!/usr/bin/env python3 +# biocopmute/services.py + +import copy +import json +import jsonref +import jsonschema +import re +from hashlib import sha256 +from biocompute.models import Bco +from biocompute.selectors import object_id_deconstructor, datetime_converter +from copy import deepcopy +from django.conf import settings +from django.contrib.auth.models import User +from django.db import transaction +from django.db.models import F +from django.utils import timezone +from prefix.models import Prefix +from prefix.services import prefix_counter_increment +from rest_framework import serializers +from simplejson.errors import JSONDecodeError +from requests.exceptions import ConnectionError as RequestsConnectionError + +"""BioCompute Services + +Service functions for working with BCOs +""" + +HOSTNAME = settings.PUBLIC_HOSTNAME +BASE_DIR = settings.BASE_DIR + +class BcoValidator: + """BCO Validator + + Handles validation of BioCompute Objects (BCOs) against JSON Schemas. + """ + + def __init__(self): + """Initializes the BCOValidator with common attributes, if any.""" + self.base_path = f"{BASE_DIR}/config/IEEE/2791object.json" + + @staticmethod + def load_schema(schema_uri): + """ + Loads a JSON Schema from a given URI. + + Parameters: + - schema_uri (str): The URI or path to the JSON schema. + + Returns: + - dict: The loaded JSON schema. + """ + + if schema_uri == \ + "https://w3id.org/ieee/ieee-2791-schema/2791object.json": + return jsonref.load_uri( + f"file://{BASE_DIR}/config/IEEE/2791object.json" + ) + try: + return jsonref.load_uri(schema_uri) + except (JSONDecodeError, TypeError, RequestsConnectionError) as e: + error_msg = "Failed to load schema. " + if isinstance(e, JSONDecodeError): + return {schema_uri: [error_msg + "JSON Decode Error."]} + elif isinstance(e, TypeError): + return {schema_uri: [error_msg + "Invalid format."]} + elif isinstance(e, RequestsConnectionError): + return {schema_uri: [error_msg + "Connection Error."]} + + def validate_json(self, schema, json_object): + """ + Validates a JSON object against a specified schema. + + Parameters: + - schema (dict): The JSON schema to validate against. + - json_object (dict): The JSON object to be validated. + + Returns: + - list: A list of error messages, empty if valid. + """ + errors = [] + validator = jsonschema.Draft7Validator(schema) + for error in validator.iter_errors(json_object): + path = "".join(f"[{v}]" for v in error.path) + errors.append(f"{path}: {error.message}" if path else error.message) + return errors + + def parse_and_validate(self, bco): + """ + Parses and validates a BCO against both the base and extension schemas. + + Parameters: + - bco (dict): The BioCompute Object to validate. + + Returns: + - dict: A dictionary containing the validation results. + """ + + identifier = bco.get("object_id", "Unknown") + results = { + identifier: { + 'number_of_errors': 0, + 'error_detail': [], + 'score': 0, + } + } + + # Validate against the base schema + base_schema = self.load_schema(bco['spec_version']) + base_errors = self.validate_json(base_schema, bco) + results[identifier]['error_detail'].extend(base_errors) + results[identifier]['number_of_errors'] += len(base_errors) + + if "usability_domain" in bco: + results[identifier]['score'] = sum(len(s) for s in bco['usability_domain']) + + # Validate against extension schemas, if any + for extension in bco.get("extension_domain", []): + extension_schema_uri = extension.get("extension_schema") + extension_schema = self.load_schema(extension_schema_uri) + if not isinstance(extension_schema, dict): # Validation passed + extension_errors = self.validate_json(extension_schema, extension) + results[identifier]['error_detail'].extend(extension_errors) + results[identifier]['number_of_errors'] += len(extension_errors) + + return results + +class ModifyBcoDraftSerializer(serializers.Serializer): + """Serializer for modifying draft BioCompute Objects (BCO). + + This serializer is used to validate and serialize data related to the + update of BCO drafts. + + Attributes: + - contents (JSONField): + The contents of the BCO in JSON format. + - authorized_users (ListField): + A list of usernames authorized to access the BCO, besides the owner. + + Methods: + - validate: Validates the incoming data for updating a BCO draft. + - update: Updates a BCO instance based on the validated data. + """ + contents = serializers.JSONField() + authorized_users = serializers.ListField(child=serializers.CharField(), required=False) + + def validate(self, attrs): + """BCO Modify Draft Validator + + Parameters: + - attrs (dict): + The incoming data to be validated. + + Returns: + - dict: + The validated data. + + Raises: + - serializers.ValidationError: If any validation checks fail. + """ + + errors = {} + + if 'authorized_users' in attrs: + for user in attrs['authorized_users']: + try: + User.objects.get(username=user) + except Exception as err: + errors['authorized_users'] =f"Invalid user: {user}" + + if errors: + raise serializers.ValidationError(errors) + + return attrs + + @transaction.atomic + def update(self, validated_data): + """Update BCO + + Updates an existing BioCompute Object (BCO) draft instance with + validated data. + + This method applies the validated changes to a BCO draft, including + updating its contents and the list of authorized users. It also + recalculates the `etag` of the BCO to reflect the new contents + and ensures that the `last_update` timestamp is current. If a list of + `authorized_users` is provided, this method replaces the current list + of authorized users with the new list, allowing for dynamic access + control to the BCO. Users not included in the new list will lose + their access unless they are the owner or have other permissions. + + This method employs Django's atomic transactions to ensure database + integrity during the update process. + + Parameters: + - instance (Bco): + The BCO instance to be updated. This parameter is automatically + supplied by the Django Rest Framework and not explicitly passed + in the serializer's call. + - validated_data (dict): + The data that has passed validation checks and is to be used to + update the BCO instance. It includes updated `contents` and + potentially a new list of `authorized_users`. + + Returns: + - Bco: The updated BCO instance + + Raises: + - Bco.DoesNotExist: + If the BCO instance with the specified `object_id` does not exist. + - User.DoesNotExist: + If one or more of the usernames in the `authorized_users` list do not correspond to valid User instances. + """ + + authorized_usernames = validated_data.pop('authorized_users', []) + bco_instance = Bco.objects.get( + object_id = validated_data['contents']['object_id'] + ) + bco_instance.contents = validated_data['contents'] + bco_instance.last_update=timezone.now() + bco_contents = deepcopy(bco_instance.contents) + etag = generate_etag(bco_contents) + bco_instance.contents['etag'] = etag + bco_instance.save() + if authorized_usernames: + authorized_users = User.objects.filter( + username__in=authorized_usernames + ) + bco_instance.authorized_users.set(authorized_users) + + return bco_instance + +class ModifyBcoDraftSerializer(serializers.Serializer): + """Serializer for modifying draft BioCompute Objects (BCO). + + This serializer is used to validate and serialize data related to the + update of BCO drafts. + + Attributes: + - contents (JSONField): + The contents of the BCO in JSON format. + - authorized_users (ListField): + A list of usernames authorized to access the BCO, besides the owner. + + Methods: + - validate: Validates the incoming data for updating a BCO draft. + - update: Updates a BCO instance based on the validated data. + """ + contents = serializers.JSONField() + authorized_users = serializers.ListField(child=serializers.CharField(), required=False) + + def validate(self, attrs): + """BCO Modify Draft Validator + + Parameters: + - attrs (dict): + The incoming data to be validated. + + Returns: + - dict: + The validated data. + + Raises: + - serializers.ValidationError: If any validation checks fail. + """ + + errors = {} + request = self.context.get('request') + + if 'authorized_users' in attrs: + for user in attrs['authorized_users']: + try: + User.objects.get(username=user) + except Exception as err: + errors['authorized_users'] =f"Invalid user: {user}" + + if errors: + raise serializers.ValidationError(errors) + + return attrs + + @transaction.atomic + def update(self, validated_data): + """ + """ + + authorized_usernames = validated_data.pop('authorized_users', []) + bco_instance = Bco.objects.get( + object_id = validated_data['contents']['object_id'] + ) + bco_instance.contents = validated_data['contents'] + bco_instance.last_update=timezone.now() + bco_contents = bco_instance.contents + bco_contents["provenance_domain"]["modified"] = datetime_converter( + timezone.now() + ) + etag = generate_etag(bco_contents) + bco_instance.contents['etag'] = etag + score = bco_score(bco_instance=bco_instance) + bco_instance.save() + if authorized_usernames: + authorized_users = User.objects.filter( + username__in=authorized_usernames + ) + bco_instance.authorized_users.set(authorized_users) + + return bco_instance + +class BcoDraftSerializer(serializers.Serializer): + """Serializer for drafting BioCompute Objects (BCO). + + This serializer is used to validate and serialize data related to the + creation or update of BCO drafts. It handles the initial data validation + including the existence of users specified as authorized users, the + validity of the prefix, and the construction or validation of the object_id + if provided. + + Attributes: + - object_id (URLField, optional): + The unique identifier of the BCO, which should be a URL. This field is + not required for creation as it can be generated. + - contents (JSONField): + The contents of the BCO in JSON format. + - prefix (CharField): + A short alphanumeric prefix related to the BCO. Defaults to 'BCO'. + - authorized_users (ListField): + A list of usernames authorized to access the BCO, besides the owner. + + Methods: + - validate: Validates the incoming data for creating or updating a BCO draft. + - create: Creates a new BCO instance based on the validated data. + """ + + object_id = serializers.URLField(required=False) + contents = serializers.JSONField() + prefix = serializers.CharField(max_length=5, min_length=3, default="BCO") + authorized_users = serializers.ListField(child=serializers.CharField(), required=False) + + def validate(self, attrs): + """BCO Draft Validator + + Validates the presence and correctness of 'authorized_users' and + 'prefix'. If 'object_id' is provided, it validates the format and + uniqueness of it. Adds the request's user as the owner of the BCO. + + Parameters: + - attrs (dict): The incoming data to be validated. + + Returns: + - dict: The validated data with additional fields such as 'owner' and + potentially modified 'prefix'. + + Raises: + - serializers.ValidationError: If any validation checks fail. + """ + + errors = {} + request = self.context.get('request') + attrs["owner"] = request.user + + if 'authorized_users' in attrs: + for user in attrs['authorized_users']: + try: + User.objects.get(username=user) + except Exception as err: + errors['authorized_users'] =f"Invalid user: {user}" + + try: + attrs['prefix'] = Prefix.objects.get(prefix=attrs['prefix']) + attrs['prefix_name'] = attrs['prefix'].prefix + except Prefix.DoesNotExist as err: + errors['prefix'] = 'Invalid prefix.' + raise serializers.ValidationError(errors) + + if 'object_id' in attrs: + id_errors = validate_bco_object_id( + attrs['object_id'], + attrs['prefix_name'] + ) + if id_errors != 0: + errors["object_id"] = id_errors + + if errors: + raise serializers.ValidationError(errors) + + return attrs + + @transaction.atomic + def create(self, validated_data): + """Creates a new BCO instance based on the validated data. + + If 'object_id' is not provided in the validated data, it generates one. + The `etag` is then generated after the BCO is created. + It also handles the creation of the BCO instance and setting up the + many-to-many relationships for 'authorized_users'. + + Parameters: + - validated_data (dict): The validated data used to create the BCO. + + Returns: + - Bco: The newly created Bco instance. + """ + + validated_data.pop('prefix_name') + authorized_usernames = validated_data.pop('authorized_users', []) + if 'object_id' not in validated_data: + object_id = create_bco_id(validated_data['prefix']) + validated_data['object_id'] = object_id + validated_data['contents']['object_id'] = object_id + + + bco_instance = Bco.objects.create( + **validated_data, last_update=timezone.now() + ) + bco_contents = deepcopy(bco_instance.contents) + etag = generate_etag(bco_contents) + bco_instance.contents['etag'] = etag + score = bco_score(bco_instance=bco_instance) + if authorized_usernames: + authorized_users = User.objects.filter( + username__in=authorized_usernames + ) + bco_instance.authorized_users.set(authorized_users) + + return bco_instance + +def validate_bco_object_id(object_id: str, prefix_name: str): + """Validate BCO object ID + + Function to validate a proposed BCO object_id. Will reject the ID if the + following constraints are not met: + 1. Correct hostname for this BCODB instance + 2. Prefix submitted is not in the object_id + 3. The object_id already exists + """ + errors = [] + + if HOSTNAME not in object_id: + errors.append("Object ID does not conform to the required format. "\ + + f"The hostname {HOSTNAME} is not in {object_id}") + if prefix_name not in object_id: + errors.append(f"Object ID, {object_id}, does not contain the "\ + + f"submitted prefix, {prefix_name}.") + + if not Bco.objects.filter(object_id=object_id).exists(): + pass + else: + errors.append(f"That object_id, {object_id}, already exists.") + + if errors: + return errors + return 0 + +def create_bco_id(prefix_instance: Prefix) -> str: + """Create BCO object_id + + Constructs a BCO object_id using a Prefix model instance. + Ensures uniqueness by incrementing the prefix's counter until a unique ID + is found. + """ + + unique_id_found = False + + while not unique_id_found: + count = prefix_counter_increment(prefix_instance) + bco_identifier = format(count, "06d") + bco_id = f"{HOSTNAME}/{prefix_instance.prefix}_{bco_identifier}/DRAFT" + + if not Bco.objects.filter(object_id=bco_id).exists(): + unique_id_found = True + + return bco_id + +def bco_counter_increment(bco_instance: Bco) -> int: + """BCO Counter Increment + + Increments the access count for a BioCompute Object (BCO). + + This function is designed to track the number of times a specific BCO has + been accessed or viewed. It increments the `access_count` field of the + provided BCO instance by one and saves the update to the database. + + Parameters: + - bco_instance (Bco): + An instance of the BCO model whose access count is to be incremented. + + Returns: + - int: + The updated access count of the BCO instance after incrementing. + """ + + bco_instance.access_count = F('access_count') + 1 + bco_instance.save() + + bco_instance.refresh_from_db() + + return bco_instance.access_count + +def generate_etag(bco_contents: dict) -> str: + """Genreate ETag + + Generates a SHA-256 hash etag for a BioCompute Object (BCO). + + The etag serves as a string-type, read-only value that protects the BCO + from internal or external alterations without proper validation. It is + generated by hashing the contents of the BCO using the SHA-256 hash + function. To ensure the integrity and uniqueness of the etag, the + 'object_id', 'spec_version', and 'etag' fields are excluded from the hash + generation process. + + Parameters: + - bco_contents (dict): + The contents of the BCO, from which the etag will be generated. + + Returns: + - str: + A SHA-256 hash string acting as the etag for the BCO. + """ + + bco_contents_copy = copy.deepcopy(bco_contents) + + for key in ['object_id', 'spec_version', 'etag']: + bco_contents_copy.pop(key, None) + + bco_etag = sha256(json.dumps(bco_contents).encode('utf-8')).hexdigest() + return bco_etag + +def check_etag_validity(bco_contents: dict) -> bool: + """ + Check the validity of an ETag for a BioCompute Object (BCO). + + This function regenerates the ETag based on the current state of the BCO's contents, + excluding the 'object_id', 'spec_version', and 'etag' fields, and compares it to the + provided ETag. If both ETags match, it indicates that the BCO has not been altered in + a way that affects its ETag, thus confirming its validity. + + Parameters: + - bco_contents (dict): + The current contents of the BCO. + + Returns: + - bool: + True if the provided ETag matches the regenerated one, False otherwise. + """ + + provided_etag = bco_contents.get("etag", "") + bco_contents_copy = copy.deepcopy(bco_contents) + + for key in ['object_id', 'spec_version', 'etag']: + bco_contents_copy.pop(key, None) + + regenerated_etag = sha256(json.dumps(bco_contents_copy).encode('utf-8')).hexdigest() + + return provided_etag == regenerated_etag + +@transaction.atomic +def publish_draft(bco_instance: Bco, user: User, object: dict): + """Create Published BCO + """ + + new_bco_instance = deepcopy(bco_instance) + new_bco_instance.id = None + new_bco_instance.state = "PUBLISHED" + contents= new_bco_instance.contents + if "published_object_id" in object: + new_bco_instance.object_id = object["published_object_id"] + else: + version = contents['provenance_domain']['version'] + draft_deconstructed = object_id_deconstructor(object["object_id"]) + draft_deconstructed[-1] = version + new_bco_instance.object_id = '/'.join(draft_deconstructed[1:]) + contents["object_id"] = new_bco_instance.object_id + new_bco_instance.last_update = timezone.now() + contents["provenance_domain"]["modified"] = datetime_converter( + timezone.now() + ) + contents["etag"] = generate_etag(contents) + score = bco_score(bco_instance=new_bco_instance) + + new_bco_instance.save() + + if "delete_draft" in object and object["delete_draft"] is True: + deleted = delete_draft(bco_instance=bco_instance, user=user) + + return new_bco_instance + +def delete_draft(bco_instance:Bco, user:User,): + """Delete Draft + + Delete draft bco + """ + + if bco_instance.owner == user: + bco_instance.state = "DELETE" + bco_instance.save() + + return "deleted" + +def bco_score(bco_instance: Bco) -> Bco: + """BCO Score + + Process and score BioCompute Objects (BCOs). + + """ + + contents = bco_instance.contents + + if "usability_domain" not in contents: + bco_instance.score = 0 + return bco_instance + + try: + usability_domain_length = sum(len(s) for s in contents['usability_domain']) + score = {"usability_domain_length": usability_domain_length} + except TypeError: + score = {"usability_domain_length": 0} + usability_domain_length = 0 + + bco_instance.score = usability_domain_length + + return bco_instance diff --git a/biocompute/urls.py b/biocompute/urls.py new file mode 100644 index 00000000..ae04de5d --- /dev/null +++ b/biocompute/urls.py @@ -0,0 +1,18 @@ +# biocompute/urls.py +"""BioCompute URLs +""" + +from django.urls import path +from biocompute.apis import ( + DraftsCreateApi, + DraftsModifyApi, + DraftsPublishApi, + ValidateBcoApi, +) + +urlpatterns = [ + path("objects/drafts/create/", DraftsCreateApi.as_view()), + path("objects/drafts/modify/", DraftsModifyApi.as_view()), + path("objects/drafts/publish/", DraftsPublishApi.as_view()), + path("objects/validate/", ValidateBcoApi.as_view()), +] \ No newline at end of file diff --git a/api/validation_definitions/IEEE/2791object.json b/config/IEEE/2791object.json similarity index 100% rename from api/validation_definitions/IEEE/2791object.json rename to config/IEEE/2791object.json diff --git a/api/validation_definitions/IEEE/description_domain.json b/config/IEEE/description_domain.json similarity index 100% rename from api/validation_definitions/IEEE/description_domain.json rename to config/IEEE/description_domain.json diff --git a/api/validation_definitions/IEEE/error_domain.json b/config/IEEE/error_domain.json similarity index 100% rename from api/validation_definitions/IEEE/error_domain.json rename to config/IEEE/error_domain.json diff --git a/api/validation_definitions/IEEE/execution_domain.json b/config/IEEE/execution_domain.json similarity index 100% rename from api/validation_definitions/IEEE/execution_domain.json rename to config/IEEE/execution_domain.json diff --git a/api/validation_definitions/IEEE/io_domain.json b/config/IEEE/io_domain.json similarity index 100% rename from api/validation_definitions/IEEE/io_domain.json rename to config/IEEE/io_domain.json diff --git a/api/validation_definitions/IEEE/parametric_domain.json b/config/IEEE/parametric_domain.json similarity index 100% rename from api/validation_definitions/IEEE/parametric_domain.json rename to config/IEEE/parametric_domain.json diff --git a/api/validation_definitions/IEEE/provenance_domain.json b/config/IEEE/provenance_domain.json similarity index 100% rename from api/validation_definitions/IEEE/provenance_domain.json rename to config/IEEE/provenance_domain.json diff --git a/api/validation_definitions/IEEE/usability_domain.json b/config/IEEE/usability_domain.json similarity index 100% rename from api/validation_definitions/IEEE/usability_domain.json rename to config/IEEE/usability_domain.json diff --git a/api/model/__init__.py b/config/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from api/model/__init__.py rename to config/__init__.py diff --git a/bcodb/asgi.py b/config/asgi.py similarity index 82% rename from bcodb/asgi.py rename to config/asgi.py index ee832654..69684188 100755 --- a/bcodb/asgi.py +++ b/config/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bco_api.settings") +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings") application = get_asgi_application() diff --git a/config/bco_scores.py b/config/bco_scores.py new file mode 100644 index 00000000..f1454db9 --- /dev/null +++ b/config/bco_scores.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +import os +import django +import json + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bcodb.settings") +django.setup() + +from django.db.models import F +from django.db.models.functions import Length +from biocompute.models import BCO + +def main(): + """ + Process and score BioCompute Objects (BCOs) based on the length of their usability_domain. + + This function performs the following steps: + 1. Fetches all published BCOs from the database, specifically retrieving their object_id and + the contents of their usability_domain. + 2. Calculates a 'score' for each BCO, which is the sum of the lengths of strings within the + usability_domain field. If the usability_domain is not present or is invalid, a score of 0 is assigned. + 3. Sorts the BCOs in descending order based on their scores. + 4. Outputs the sorted list of BCOs, along with their scores, to a JSON file named 'bco_scores.json'. + + The scoring and sorting help in identifying BCOs with more detailed and potentially more useful usability domains. + """ + # Fetching all BCOs with their id and usability_domain + bcos = BCO.objects.filter(state='PUBLISHED').annotate( + usability_domain=F('contents__usability_domain') + ).values('object_id', 'usability_domain') + + bcos_list = list(bcos) + + count = 0 + for bco in bcos_list: + + try: + usability_domain_length = sum(len(s) for s in bco['usability_domain']) + count += 1 + bco['score'] = { + "usability_domain_length": usability_domain_length + } + except TypeError: + bco['score'] = { + "usability_domain_length": 0 + } + count += 1 + + sorted_list_of_dicts = sorted(bcos_list, key=lambda d: d['score']['usability_domain_length'], reverse=True) + + with open('bco_scores.json', "w") as file: + json.dump(sorted_list_of_dicts, file, indent=4) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/config/fixtures/local_data.json b/config/fixtures/local_data.json new file mode 100644 index 00000000..3dfd6104 --- /dev/null +++ b/config/fixtures/local_data.json @@ -0,0 +1,4409 @@ +[ + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2024-04-03T11:36:31.006Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "object_repr": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Authorized users\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 2, + "fields": { + "action_time": "2024-04-11T12:43:08.498Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 3, + "fields": { + "action_time": "2024-04-11T13:34:58.678Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.21", + "object_repr": "http://127.0.0.1:8000/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 4, + "fields": { + "action_time": "2024-04-11T15:01:33.676Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.21", + "object_repr": "http://127.0.0.1:8000/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 5, + "fields": { + "action_time": "2024-04-11T20:49:34.241Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.21", + "object_repr": "http://127.0.0.1:8000/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 6, + "fields": { + "action_time": "2024-04-11T20:52:33.448Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.21", + "object_repr": "http://127.0.0.1:8000/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "auth.permission", + "pk": 1, + "fields": { + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" + } + }, + { + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, + "fields": { + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add blacklisted token", + "content_type": 9, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change blacklisted token", + "content_type": 9, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 9, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view blacklisted token", + "content_type": 9, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add group object permission", + "content_type": 10, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change group object permission", + "content_type": 10, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete group object permission", + "content_type": 10, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view group object permission", + "content_type": 10, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add user object permission", + "content_type": 11, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change user object permission", + "content_type": 11, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete user object permission", + "content_type": 11, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view user object permission", + "content_type": 11, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add new user", + "content_type": 12, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change new user", + "content_type": 12, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete new user", + "content_type": 12, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view new user", + "content_type": 12, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add authentication", + "content_type": 13, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change authentication", + "content_type": 13, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete authentication", + "content_type": 13, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view authentication", + "content_type": 13, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can view BCOs with prefix NOPUB", + "content_type": 13, + "codename": "view_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can add BCOs with prefix NOPUB", + "content_type": 13, + "codename": "add_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can change BCOs with prefix NOPUB", + "content_type": 13, + "codename": "change_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can delete BCOs with prefix NOPUB", + "content_type": 13, + "codename": "delete_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can publish BCOs with prefix NOPUB", + "content_type": 13, + "codename": "publish_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can add new user", + "content_type": 11, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can change new user", + "content_type": 11, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can delete new user", + "content_type": 11, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can view new user", + "content_type": 11, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can add authentication", + "content_type": 10, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can change authentication", + "content_type": 10, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can delete authentication", + "content_type": 10, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can view authentication", + "content_type": 10, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can add bco", + "content_type": 12, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can change bco", + "content_type": 12, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can delete bco", + "content_type": 12, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can view bco", + "content_type": 12, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!Bh8Fg1xZLdW7N3SEpDh5IO2PzJZtsMDEqwHeJn5w", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2024-03-14T13:52:22.277Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "tester", + "first_name": "", + "last_name": "", + "email": "tester@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 53 + ] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab", + "first_name": "", + "last_name": "", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe", + "first_name": "", + "last_name": "", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", + "last_login": "2024-04-10T22:06:48.972Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "", + "last_name": "", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2024-04-03T10:39:01Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 57, + 53 + ] + } + }, + + { + "model": "sessions.session", + "pk": "0gv8wnnng15dgqxaysg8vs194c96ux2x", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hAcKAWX7j0DGZhBqgaS0q6Md7dNutDtf-_9twi4LiWsnecwkbiIUZx-t4jpyXUH9MB6bzK1usxTlLsiD9rlrRG_rof7d1Cwl632Hny2PCqHenAUWWeljIsKjMpOG8WIvPFEDASG0HqfXAQYEHO0Z_H5AuH2ODg:1rug5g:BEIA7KC4h5LRM3Nqo-163CC0oUR-Fw_QdB3aMV0UAaY", + "expire_date": "2024-04-24T22:06:48.974Z" + } + }, + { + "model": "authtoken.token", + "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "fields": { + "user": 5, + "created": "2024-03-14T15:21:04.318Z" + } + }, + { + "model": "authtoken.token", + "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", + "fields": { + "user": 7, + "created": "2024-04-03T10:53:08.951Z" + } + }, + { + "model": "authtoken.token", + "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "fields": { + "user": 4, + "created": "2024-03-14T15:21:14.996Z" + } + }, + { + "model": "authtoken.token", + "pk": "627626823549f787c3ec763ff687169206626149", + "fields": { + "user": 1, + "created": "2024-03-14T13:53:45.793Z" + } + }, + { + "model": "authtoken.token", + "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "fields": { + "user": 6, + "created": "2024-03-14T15:21:09.348Z" + } + }, + { + "model": "authentication.authentication", + "pk": 1, + "fields": { + "username": "bco_api_user", + "auth_service": [ + { + "iss": "Reeya1", + "sub": "ReeyaGupta1" + } + ] + } + }, + { + "model": "authentication.newuser", + "pk": 1, + "fields": { + "email": "test_new_user@testing.com", + "temp_identifier": "sample_temp_identifier", + "token": "token", + "hostname": "http://localhost:8000/", + "created": "2024-03-14T14:28:32Z" + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 7, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 23, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "eb8ac2d04b2d3204b88e0bc6e3a66dcfac4af934c1ebe7ce629f8f584d5f3d7a", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2024-04-11T16:44:51.054Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-11T16:44:51.054Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0d692b94bc2528660470e38c78708cc06605f941", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2024-04-04T12:53:33", + "modified": "2024-04-04T12:53:33.679Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [], + "error_domain": {} + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T13:00:38.650Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 3, + "authorized_users": [ + 5 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "bco_api_user", + "state": "DELETE", + "score": 0, + "last_update": "2024-04-04T04:34:54Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000005/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000006/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000007/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "prefix.prefix", + "pk": "BCO", + "fields": { + "certifying_key": "1", + "created": "2024-03-14T13:53:59Z", + "description": "Default prefix for all BioCompute Objects", + "owner": "AnonymousUser", + "counter": 4, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "NOPUB", + "fields": { + "certifying_key": "", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "counter": 0, + "public": false + } + }, + { + "model": "prefix.prefix", + "pk": "TEST", + "fields": { + "certifying_key": "12345", + "created": "2024-03-14T13:53:59Z", + "description": "Test prefix", + "owner": "tester", + "counter": 7, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "HIVE", + "fields": { + "certifying_key": "lazc=!pr35pau5b2x-x70y3_t++d5^s4cgk=mjo5o3^9zvrzki", + "created": "2024-03-14T13:53:59Z", + "description": "Test HIVE prefix", + "owner": "hivelab", + "counter": 0, + "public": true + } + } +] \ No newline at end of file diff --git a/config/fixtures/test_portal.json b/config/fixtures/test_portal.json new file mode 100644 index 00000000..34ca427c --- /dev/null +++ b/config/fixtures/test_portal.json @@ -0,0 +1,4409 @@ +[ + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2024-04-03T11:36:31.006Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "object_repr": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Authorized users\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 2, + "fields": { + "action_time": "2024-04-11T12:43:08.498Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 3, + "fields": { + "action_time": "2024-04-11T13:34:58.678Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 4, + "fields": { + "action_time": "2024-04-11T15:01:33.676Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 5, + "fields": { + "action_time": "2024-04-11T20:49:34.241Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "admin.logentry", + "pk": 6, + "fields": { + "action_time": "2024-04-11T20:52:33.448Z", + "user": 7, + "content_type": 12, + "object_id": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "object_repr": "https://test.portal.biochemistry.gwu.edu/TEST_000001/1.21", + "action_flag": 3, + "change_message": "" + } + }, + { + "model": "auth.permission", + "pk": 1, + "fields": { + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" + } + }, + { + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, + "fields": { + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add blacklisted token", + "content_type": 9, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change blacklisted token", + "content_type": 9, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 9, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view blacklisted token", + "content_type": 9, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add group object permission", + "content_type": 10, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change group object permission", + "content_type": 10, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete group object permission", + "content_type": 10, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view group object permission", + "content_type": 10, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add user object permission", + "content_type": 11, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change user object permission", + "content_type": 11, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete user object permission", + "content_type": 11, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view user object permission", + "content_type": 11, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add new user", + "content_type": 12, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change new user", + "content_type": 12, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete new user", + "content_type": 12, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view new user", + "content_type": 12, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add authentication", + "content_type": 13, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change authentication", + "content_type": 13, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete authentication", + "content_type": 13, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view authentication", + "content_type": 13, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can view BCOs with prefix NOPUB", + "content_type": 13, + "codename": "view_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can add BCOs with prefix NOPUB", + "content_type": 13, + "codename": "add_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can change BCOs with prefix NOPUB", + "content_type": 13, + "codename": "change_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can delete BCOs with prefix NOPUB", + "content_type": 13, + "codename": "delete_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can publish BCOs with prefix NOPUB", + "content_type": 13, + "codename": "publish_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can add new user", + "content_type": 11, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can change new user", + "content_type": 11, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can delete new user", + "content_type": 11, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can view new user", + "content_type": 11, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can add authentication", + "content_type": 10, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can change authentication", + "content_type": 10, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can delete authentication", + "content_type": 10, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can view authentication", + "content_type": 10, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can add bco", + "content_type": 12, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can change bco", + "content_type": 12, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can delete bco", + "content_type": 12, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can view bco", + "content_type": 12, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!Bh8Fg1xZLdW7N3SEpDh5IO2PzJZtsMDEqwHeJn5w", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2024-03-14T13:52:22.277Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "tester", + "first_name": "", + "last_name": "", + "email": "tester@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 53 + ] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab", + "first_name": "", + "last_name": "", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe", + "first_name": "", + "last_name": "", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", + "last_login": "2024-04-10T22:06:48.972Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "", + "last_name": "", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2024-04-03T10:39:01Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 57, + 53 + ] + } + }, + + { + "model": "sessions.session", + "pk": "0gv8wnnng15dgqxaysg8vs194c96ux2x", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hAcKAWX7j0DGZhBqgaS0q6Md7dNutDtf-_9twi4LiWsnecwkbiIUZx-t4jpyXUH9MB6bzK1usxTlLsiD9rlrRG_rof7d1Cwl632Hny2PCqHenAUWWeljIsKjMpOG8WIvPFEDASG0HqfXAQYEHO0Z_H5AuH2ODg:1rug5g:BEIA7KC4h5LRM3Nqo-163CC0oUR-Fw_QdB3aMV0UAaY", + "expire_date": "2024-04-24T22:06:48.974Z" + } + }, + { + "model": "authtoken.token", + "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "fields": { + "user": 5, + "created": "2024-03-14T15:21:04.318Z" + } + }, + { + "model": "authtoken.token", + "pk": "bd97d8cbec1fc7234e11e80957496aefc20c6395", + "fields": { + "user": 7, + "created": "2024-04-03T10:53:08.951Z" + } + }, + { + "model": "authtoken.token", + "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "fields": { + "user": 4, + "created": "2024-03-14T15:21:14.996Z" + } + }, + { + "model": "authtoken.token", + "pk": "627626823549f787c3ec763ff687169206626149", + "fields": { + "user": 1, + "created": "2024-03-14T13:53:45.793Z" + } + }, + { + "model": "authtoken.token", + "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "fields": { + "user": 6, + "created": "2024-03-14T15:21:09.348Z" + } + }, + { + "model": "authentication.authentication", + "pk": 1, + "fields": { + "username": "bco_api_user", + "auth_service": [ + { + "iss": "Reeya1", + "sub": "ReeyaGupta1" + } + ] + } + }, + { + "model": "authentication.newuser", + "pk": 1, + "fields": { + "email": "test_new_user@testing.com", + "temp_identifier": "sample_temp_identifier", + "token": "token", + "hostname": "http://localhost:8000/", + "created": "2024-03-14T14:28:32Z" + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000000/1.0", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 7, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 23, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000001/1.0", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "eb8ac2d04b2d3204b88e0bc6e3a66dcfac4af934c1ebe7ce629f8f584d5f3d7a", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2024-04-11T16:44:51.054Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-11T16:44:51.054Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000002/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/BCO_000004/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000004/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0d692b94bc2528660470e38c78708cc06605f941", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2024-04-04T12:53:33", + "modified": "2024-04-04T12:53:33.679Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [], + "error_domain": {} + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T13:00:38.650Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 3, + "authorized_users": [ + 5 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000001/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "bco_api_user", + "state": "DELETE", + "score": 0, + "last_update": "2024-04-04T04:34:54Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000002/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000004/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000005/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000006/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "https://test.portal.biochemistry.gwu.edu/TEST_000007/DRAFT", + "fields": { + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "prefix.prefix", + "pk": "BCO", + "fields": { + "certifying_key": "1", + "created": "2024-03-14T13:53:59Z", + "description": "Default prefix for all BioCompute Objects", + "owner": "AnonymousUser", + "counter": 4, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "NOPUB", + "fields": { + "certifying_key": "", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "counter": 0, + "public": false + } + }, + { + "model": "prefix.prefix", + "pk": "TEST", + "fields": { + "certifying_key": "12345", + "created": "2024-03-14T13:53:59Z", + "description": "Test prefix", + "owner": "tester", + "counter": 7, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "HIVE", + "fields": { + "certifying_key": "lazc=!pr35pau5b2x-x70y3_t++d5^s4cgk=mjo5o3^9zvrzki", + "created": "2024-03-14T13:53:59Z", + "description": "Test HIVE prefix", + "owner": "hivelab", + "counter": 0, + "public": true + } + } +] \ No newline at end of file diff --git a/config/services.py b/config/services.py new file mode 100644 index 00000000..26179854 --- /dev/null +++ b/config/services.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# config/services.py + +from rest_framework import status + +"""DB Level Services + + This module contains service functions that apply to the entire BioCompute + Object Database (BCODB). It includes utility functions for handling + response status determination, legacy API data conversion, and + constructing standardized response objects. +""" + +def response_status(accepted_requests: bool, rejected_requests: bool)-> status: + """Determine Response Status + + Determines the appropriate HTTP response status code based on the + acceptance or rejection of requests. + + Parameters: + - accepted_requests (bool): + Flag indicating whether any requests have been accepted. + - rejected_requests (bool): + Flag indicating whether any requests have been rejected. + + Returns: + - int: The HTTP status code representing the outcome. Possible values are: + - status.HTTP_400_BAD_REQUEST (400) if all requests are rejected. + - status.HTTP_207_MULTI_STATUS (207) if there is a mix of accepted and rejected requests. + - status.HTTP_200_OK (200) if all requests are accepted. + """ + + if accepted_requests is False and rejected_requests == True: + status_code = status.HTTP_400_BAD_REQUEST + + if accepted_requests is True and rejected_requests is True: + status_code = status.HTTP_207_MULTI_STATUS + + if accepted_requests is True and rejected_requests is False: + status_code = status.HTTP_200_OK + + return status_code + +def legacy_api_converter(data:dict) ->dict: + """Legacy API converter + + Used to remove the `POST_` object from requests. + Prefix APIs and "draft_publish" APIs require a little more cleaning. + """ + _, new_data = data.popitem() + + if "draft_id" in new_data[0]: + return_data =[] + for object in new_data: + delete_draft = object.get("delete_draft", False) + if "object_id" in object: + return_data.append({ + "object_id": object["draft_id"], + "published_object_id": object["object_id"], + "delete_draft": delete_draft + }) + else: + return_data.append({ + "object_id": object["draft_id"], + "delete_draft": delete_draft + }) + return return_data + + if "prefixes" in new_data[0]: + return_data =[] + for object in new_data: + owner_group = object["owner_group"] + for prefix in object['prefixes']: + return_data.append({ + "prefix": prefix["prefix"], + "description": prefix["description"] + }) + return return_data + + return new_data + +def response_constructor( + identifier: str, + status: str, + code: str, + message: str=None, + data: dict= None + )-> dict: + + """Constructs a structured response dictionary. + + This function creates a standardized response object for API responses. + It structures the response with a given identifier as the key and includes + details such as status, code, an optional message, and optional data. + + Parameters: + - identifier (str): + A unique identifier for the response object. + - status (str): + The request status (e.g., 'success', 'error')indicating the outcome + of the operation. + - code (str): + The HTTP status code representing the result of the operation. + - message (str, optional): + An optional message providing additional information about the + response or the result of the operation. Default is None. + - data (dict, optional): + An optional dictionary containing any data that should be returned in + the response. This can include the payload of a successful request or + details of an error. Default is None. + """ + + response_object = { + "identifier": identifier, + "request_status": status, + "status_code": code + } + + if data is not None: + response_object["data"] = data + if message is not None: + response_object["message"] = message + + return response_object diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 00000000..408cd27a --- /dev/null +++ b/config/settings.py @@ -0,0 +1,184 @@ +""" +Django settings for BioCompute Database project. +""" + +import os +from datetime import timedelta +import configparser +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +# --- SECURITY SETTINGS --- # +# Load the server config file. +secrets = configparser.ConfigParser() +secrets.read(BASE_DIR + "/.secrets") +PRODUCTION = secrets["SERVER"]["PRODUCTION"] +DEBUG = secrets["SERVER"]["DEBUG"] +VERSION = secrets["SERVER"]["SERVER_VERSION"] +# Set the anonymous user's key. +ANON_KEY = secrets["DJANGO_KEYS"]["ANON_KEY"] +ALLOWED_HOSTS = secrets["SERVER"]["ALLOWED_HOSTS"].split(',') +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = secrets["DJANGO_KEYS"]["SECRET_KEY"] + +# SECURITY WARNING: don't run with debug turned on in production! + +# The publicly accessible hostname. +HOSTNAME = secrets["SERVER"]["HOSTNAME"] +# The human-readable hostname. +HUMAN_READABLE_HOSTNAME = secrets["SERVER"]["HUMAN_READABLE_HOSTNAME"] +# The publicly accessible hostname. +PUBLIC_HOSTNAME = secrets["SERVER"]["PUBLIC_HOSTNAME"] + + +CORS_ORIGIN_ALLOW_ALL = True +CORS_ORIGIN_WHITELIST = ["*"] + +# Use the REST framework +REST_FRAMEWORK = { + "DEFAULT_AUTHENTICATION_CLASSES": [ + 'authentication.services.CustomJSONWebTokenAuthentication', + "rest_framework.authentication.TokenAuthentication", + 'rest_framework.authentication.SessionAuthentication', + 'rest_framework.authentication.BasicAuthentication', + ], + "DEFAULT_PERMISSION_CLASSES": ["rest_framework.permissions.IsAuthenticated"], + "DEFAULT_SCHEMA_CLASS": "rest_framework.schemas.coreapi.AutoSchema", +} + +JWT_AUTH = { + "JWT_RESPONSE_PAYLOAD_HANDLER": "authentication.services.CustomJSONWebTokenAuthentication", + "JWT_EXPIRATION_DELTA": timedelta(seconds=604800), + "JWT_REFRESH_EXPIRATION_DELTA": timedelta(days=14), + "JWT_ALLOW_REFRESH": True, +} + +# Password validation +# https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + +AUTHENTICATION_BACKENDS = [ + "django.contrib.auth.backends.ModelBackend", +] + +# --- APPLICATION --- # +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.admindocs", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "drf_yasg", + "rest_framework", + "rest_framework.authtoken", + 'rest_framework_jwt', + 'rest_framework_jwt.blacklist', + "rest_framework_swagger", + "reset_migrations", + "authentication", + "biocompute", + "prefix" +] + +# Source: https://dzone.com/articles/how-to-fix-django-cors-error +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "corsheaders.middleware.CorsMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "config.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +SWAGGER_SETTINGS = { + "SECURITY_DEFINITIONS": { + "Bearer": {"type": "apiKey", "name": "Authorization", "in": "header"} + }, + "DEEP_LINKING": True, +} + +REDOC_SETTINGS = {"LAZY_RENDERING": False} + +WSGI_APPLICATION = "config.wsgi.application" + +# Database +# https://docs.djangoproject.com/en/3.0/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": secrets["SERVER"]["DATABASE"], + } +} + +# Internationalization +# https://docs.djangoproject.com/en/3.0/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/3.0/howto/static-files/ + +STATIC_URL = "/api/static/" +STATICFILES_DIRS = [os.path.join(BASE_DIR, 'static')] +# STATIC_ROOT = "/var/www/bcoeditor/bco_api/bco_api/static/" + +# ----- CUSTOM VARIABLES AND METHODS ----- # +# Load request and validation templates (definitions). +# Note that we will get TWO loads of settings.py if we start without runserver --noreload +# There is only set of definitions for requests, but for validations, we may have sub-folders. +# First, the request definitions. + +# Make the object naming accessible as a dictionary. + +# emailing notifications +EMAIL_BACKEND = secrets["SERVER"]["EMAIL_BACKEND"] +EMAIL_HOST = "localhost" +EMAIL_PORT = 25 +DEFAULT_AUTO_FIELD = "django.db.models.AutoField" diff --git a/config/urls.py b/config/urls.py new file mode 100755 index 00000000..fc7add75 --- /dev/null +++ b/config/urls.py @@ -0,0 +1,58 @@ +"""URL Configuration + +Top level URL configuration for BCO DB. See `api.urls` for APIs +""" +import configparser +from django.conf import settings +from django.contrib import admin +from django.urls import path, include, re_path +from drf_yasg.views import get_schema_view +from drf_yasg import openapi +from rest_framework import permissions +from biocompute.apis import DraftRetrieveApi, PublishedRetrieveApi + +VERSION = settings.VERSION + +ShcemaView = get_schema_view( + openapi.Info( + title="BioCompute Object Data Base API (BCODB API)", + default_version=VERSION, + description="A web application that can be used to create, store and " + "edit BioCompute objects based on BioCompute schema described " + "in the BCO specification document.", + terms_of_service="https://github.com/biocompute-objects/bco_api/blob/master/LICENSE", + contact=openapi.Contact(email="object.biocompute@gmail.com"), + license=openapi.License(name="MIT License"), + ), + public=True, + permission_classes=(permissions.AllowAny,), +) + +urlpatterns = [ + re_path( + r"^api/doc(?P\.json|\.yaml)$", + ShcemaView.without_ui(cache_timeout=0), + name="schema-json", + ), + path( + "api/docs/", + ShcemaView.with_ui("swagger", cache_timeout=0), + name="schema-swagger-ui", + ), + path( + "api/redocs/", + ShcemaView.with_ui("redoc", cache_timeout=0), + name="schema-redoc", + ), + path("api/admin/", admin.site.urls), + path("api/", include("authentication.urls")), + path("api/", include("search.urls")), + path("api/", include("biocompute.urls")), + path("api/", include("prefix.urls")), + path("/DRAFT", DraftRetrieveApi.as_view()), + path( + "/", + PublishedRetrieveApi.as_view() + ), + # path("", ObjectIdRootObjectId.as_view()), +] diff --git a/bcodb/wsgi.py b/config/wsgi.py similarity index 82% rename from bcodb/wsgi.py rename to config/wsgi.py index fb46c2c9..8209768a 100755 --- a/bcodb/wsgi.py +++ b/config/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bcodb.settings") +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings") application = get_wsgi_application() diff --git a/docs/bco_scores.json b/docs/bco_scores.json new file mode 100644 index 00000000..9705f004 --- /dev/null +++ b/docs/bco_scores.json @@ -0,0 +1,13216 @@ +[ + { + "object_id": "https://biocomputeobject.org/BCO_000283/1.0", + "usability_domain": [ + "Until the present day, the majority of cancer genomic have been focusing on identifying the tumor gene and the pathways involving in tumor development. Despite of this being a huge success, there is still a little knowledge of why cancer patients with similar cancer driver genes may result in different disease outcome and/or drug responses. Therefore, there is a need of understanding deeper into this problem. First, it is essential to identify the genes that cause the cancer, or the cancer-driver genes. Therefore, the completion of The Cancer Genome Atlas (TCGA) and other large scale genomic project are important because those projects will provide a critical and essential data to identify driver mutation that lead to cancer. Although it a fast-growing project, the TCGA is far from completed because of the mutation diversity and it is difficult to increase the size of the datasets. A complementary approach toward that goal is to integrate cancer mutation profiles and increase the statistical power of analysis. Moreover, information on the structure of the proteins coded by genes is checked to see the enrichment in cancer mutation in specific regions.", + "In this publication, the authors introduce the extension of an e-Driver that use information on three-dimensional structures of the mutated proteins to identify specific structures. The algorithm analyzed if these structural feature are enriched in cancer somatic mutation and can become candidates of cancer-driver genes. The authors specifically pay attention on the protein-protein interaction (PPI) interfaces since a lot of cancer-driver genes are located in the important region of the PPI network. Here, the authors identified PPi interfaces in a total of 103 genes. 32 of these are well-known cancer-driver genes. The function of the remaining 71 still have to be verified experimentally. The authors also showed that depending on which interface or protein region is altered, tumors apparently driven by the same cancer gene may have different outcomes. ", + "The result assembling a data set of 5,989 tumors from 23 cancer types from the TCGA showed that the average number of missense mutation per sample is highly variable among cancer types, with melanoma is the highest (429 mutations per sample) and thyroid carcinoma is the lowest (11 mutations per sample). Mutations from 868,508 cancer datasets are distributed randomly across the proteome with 30% of mutations in structures and 6% in PPI interfaces (https://doi.org/10.1371/journal.pcbi.1004518.g001). In 103 interface driver genes in the Pan-cancer analysis, there is a huge overlap between the genes identified in this analysis and lists of the known cancer genes (https://doi.org/10.1371/journal.pcbi.1004518.g002) (https://doi.org/10.1371/journal.pcbi.1004518.g003). The result also emphasizes the differences and similarities across related driver genes. The analysis of 71 interface driver genes that are identified as not cancer-driver genes determine their potential roles in cancer. Furthermore, they also have function which is related to immunity", + "The raw data and the algorithm can be download from (http://github.com/eduardporta/e-Driver). The 3 level mutation data can be download from TCGA portal (https://tcga-data.nci.nih.gov) for 5,989 tumor samples that belongs to 23 different cancer types. 18,651 protein structures were identified from PDB (2014) for the analysis of residues implicated with PPI interfaces. The complete dataset containing PPI structures and models are from Interactome3D. e-Driver is used to identify interfaces that are enriched in somatic missense mutations,", + "This is the first time that 3D PPI interfaces have been used to identify genes across large cancer datasets. The analysis showed that cancer driver genes, such as TP35, HRAS, PIK3CA or EGFR can find relevant genes and interaction interfaces alteration is a common pathogenic mechanism of cancer somatic mutation. The authors also found that tumors with mutations in the same driver gene can have different behavior and outcomes, depend on the PPI interface affected by the mutation. This research has focused on the analysis and interpretation of of missense mutation. However, there are other types of variations that still can act as cancer drivers and have a huge impact on the outcomes of the patients.", + "There is no parameter that need to be changed" + ], + "score": { + "usability_domain_length": 4381 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000280/0.1.0", + "usability_domain": [ + "Tempora is a tool for creating cell lineage trajectories using single cell RNA sequencing (scRNA-seq) data. Many tools exist for creating lineage trajectories (Monocle, TSCAN, Slingshot, etc) along a so called pseudotemporal scale. The scale is arranged using a variety of methods such as using a minimum spanning tree (MST) method that arranges cell clusters along an axis according to their transcriptome similarity, determined from the scRNA-seq data, such that the expression differences (\u201ctime\u201d) between all clusters is as small as possible. This and other methods solidly assume that the differences in gene expression are correlated to where along the time-path of a particular lineage each cell exists. Tempora partially removes this assumption by incorporating temporal data collected during time-course scRNA-seq experiments. Tempora\u2019s novel innovation over existing time course tools like Waddinton-OT and CSHMM is creating enriched, redundancy reduced, cluster-level pathways that use time series data to predict temporal directions between cell types. Fields like immunology and developmental biology are very interested in determining cell lineages and predicting trajectories to determine the systemic differences (and manipulating those systems) between progenitors and differentiated cells. Tempora describes initial scRNA-seq data clean-up procedures and two workflows: generating cell lineage trajectories and determining temporally related pathways. This BCO will focus on the cell lineage trajectory workflow using a human skeletal muscle myoblast dataset. ", + "Paper: https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008205#sec011", + "GitRepository: https://github.com/BaderLab/Tempora", + "Condensed Worflow: Tempora takes cleaned and annotated scRNA-seq data and uses gene set variation analysis (GSVA) to create enriched pathway profiles for each predetermined cell-type cluster. These profiles then undergo principle component analysis to select the most relevant pathways for each cluster. The reduced profiles are then used to construct a mutual information network between cell clusters. Each cell cluster is then assigned a temporal score according to its cell composition from each time point in the data\u2019s series. The temporal scores are then used to assign direction (from earlier to later) to the edges assigned to each cluster. The final output is viewed as an inferred lineage trajectory map with cell clusters as nodes connected by arrows representing the flow of time in cell lineage.Condensed Worflow: Tempora takes cleaned and annotated scRNA-seq data and uses gene set variation analysis (GSVA) to create enriched pathway profiles for each predetermined cell-type cluster. These profiles then undergo principle component analysis to select the most relevant pathways for each cluster. The reduced profiles are then used to construct a mutual information network between cell clusters. Each cell cluster is then assigned a temporal score according to its cell composition from each time point in the data\u2019s series. The temporal scores are then used to assign direction (from earlier to later) to the edges assigned to each cluster. The final output is viewed as an inferred lineage trajectory map with cell clusters as nodes connected by arrows representing the flow of time in cell lineage." + ], + "score": { + "usability_domain_length": 3342 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000517/v-2.0.2", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Sites (MCW) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The current dataset has 4997 proteins with O-GlcNAc modifications. Out of which 3262 proteins have no site information where as 1735 proteins have 6722 sites.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Human O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s human protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to human species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding human UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a total of 2,237 proteins had an O-GlcNAc score of 3 or under, meaning that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation.", + "The log file - https://data.glygen.org/ln2data/releases/data/v-1.12.3/logs/human_proteoform_glycosylation_sites_o_glcnac_mcw.log contains the entries that were excluded from the output file along with the reason for exclusion.", + "The orginal unprocessed dataset in excel format can be found at https://figshare.com/articles/The_human_O-GlcNAcome_database/12443495/4" + ], + "score": { + "usability_domain_length": 3011 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000517/v-2.1.1", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Sites (MCW) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The current dataset has 4997 proteins with O-GlcNAc modifications. Out of which 3262 proteins have no site information where as 1735 proteins have 6722 sites.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Human O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s human protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to human species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding human UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a total of 2,237 proteins had an O-GlcNAc score of 3 or under, meaning that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation.", + "The log file - https://data.glygen.org/ln2data/releases/data/v-1.12.3/logs/human_proteoform_glycosylation_sites_o_glcnac_mcw.log contains the entries that were excluded from the output file along with the reason for exclusion.", + "The orginal unprocessed dataset in excel format can be found at https://figshare.com/articles/The_human_O-GlcNAcome_database/12443495/4" + ], + "score": { + "usability_domain_length": 3011 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000325/3.0", + "usability_domain": [ + "A virus genome was used as a basis for developing diagnostic tests using polymerase chain reactions (PCR), identifying circulating strains, discovering mutations that could alter transmission rates, pathogenicity, drug research, culminating in vaccine development (Mercer & Salit, 2021).", + "There are two main methods of assembling the genome: reference-based and de novo assembly. The reference-based assembly method is used when the genome of the target organism is available, and the reads are aligned to the reference. In this approach, the focus is on Accepting indels and multiple nucleotides in the structure and gene variant information (Chen et al., 2017). In de novo assembly, consensus sequences are generated without reference genomes by using heuristics to maintain single/ multiple nucleotide variants and indels (Li, 2012).", + "Most of the pipelines developed for the genome assembly of SARS-CoV-2 follow a Reference-based strategies, including Viralrecon, V-pipe, SIGNAL, and CLC. These are only reference-based assemblies and may miss critical information regarding multiple nucleotide variants or indels since they are forced to have the same structure as the reference.", + "However, de novo assemblies are aimed at capturing the natural structure By maintaining any indel or multiple nucleotide variants found in the sequences. Up until now, there has been no published pipeline for combining reference-based and a de novo assembly strategy for the SARS-CoV-2 genome. Combining these strategies would be beneficial a virus sample could be analyzed to gather valuable and reliable information since would recover natural genome information such as indels and multiple nucleotide variants utilize the reference genome for guiding and organizing the de novo assembly as well sequences.", + "Pipcov combines both the advantages of de novo and reference-based assembly strategies. It provides assembly for variant identification of SARS-CoV-2 viruses", + "PipeCoV got smaller and more variable values for the consensus length (average of 29,754 bp) as it combines do novo and reference-based strategies for the assembly. About genome coverage, PipeCov showed an average genome coverage of 97.01%.", + "Another important quality metric is number of N\u2019s, in high-quality assemblies must be less than 1% of the total consensus length and the length of the consensus must be greater than 29,000 bp Briones et al. (2020) PipeCoV generated three consensus sequences with the length between 29.000 bp and 28.837 bp and 64 consensus sequences with more than 300 N\u2019s, that delivers a high quality consensus compared to other pipelines. PipeCoV benchmarked with those 120 paired-end datasets. All data relating to the 120 sequenced samples can be found in the https://www.ncbi.nlm.nih.gov/ pmc/articles/PMC9013232/bin/peerj-10-13300-s001.xlsx PipecoV workflow https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9013232/figure/ fig-1/" + ], + "score": { + "usability_domain_length": 2899 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000633/v-2.0.2", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Sites (MCW) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Fruitfly O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s fruitfly protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to fruitfly species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding fruitfly UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2476 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000633/v-2.1.1", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Sites (MCW) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Fruitfly O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s fruitfly protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to fruitfly species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding fruitfly UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2476 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000631/v-2.0.2", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Sites (MCW) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s mouse protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to mouse species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding mouse UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000631/v-2.1.1", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Sites (MCW) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s mouse protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to mouse species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding mouse UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000632/v-2.0.2", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Sites (MCW) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s rat protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to rat species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding rat UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2449 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000632/v-2.1.1", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Sites (MCW) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", + "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", + "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", + "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s rat protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to rat species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding rat UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", + "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." + ], + "score": { + "usability_domain_length": 2449 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000277/4.0", + "usability_domain": [ + "Olduvai protein domains (formerly \"DUF1220\") are the most duplicated protein coding sequence in the human genome (https://doi.org/10.1371/journal.pbio.0020207). They are expressed in many tissues, including strongly in the brain (https://doi.org/10.1126/science.1127980). The copy number of Olduvai domains has been linked to increased brain size (https://doi.org/10.1007/s00429-014-0814-9), and performance on IQ tests (https://doi.org/10.1007/s00439-014-1489-2), as well as neurodiverse states like autism (https://doi.org/10.1371/journal.pgen.1004241).", + "Precise evaluation of copy number in humans has been difficult to achieve, as ratiometric approaches fail to identify small changes when the total number is the ~300 range, as in humans. Read depth approaches using short read WGS data are promising, but most existing pipelines mask repeats altogether, and those that do measure copy number do it relative to the gene. However, Olduvai domains are known to exist in different number and kind within a family of genes, rendering this approach inoperative. The pipeline described here is used to identify the copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods, designed with DUF1220 copy number in mind.", + "Approximately 25 individuals were randomly chosen from each of the American (Utah -- Northern and Western European ancestry; CEU), Nigerian (Yoruba; YRI), Han Chinese (Beijing; CHB), Japanese (Tokyo; JPT), Mexican-American (Los Angeles; MXL), Colombian (Medellin; CLM), Puerto Rican (Puerto Rico; PUR), African-American (Southwest US; ASW), Luhya (Webuye, Kenya; LWK), Han Chinese (South China; CHS), Tuscan (Toscana, Italia; TSI), Spanish (Iberian populations; IBS), Finnish (Finland; FIN), and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. No intermediate files were generated because the commands were run executed as a pipe at the command line, so T:/dev/tmpfs was used for the file IOs in the Description Domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" + ], + "score": { + "usability_domain_length": 2287 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000452/1.0", + "usability_domain": [ + "Olduvai protein domains (formerly \"DUF1220\") are the most duplicated protein coding sequence in the human genome (https://doi.org/10.1371/journal.pbio.0020207). They are expressed in many tissues, including strongly in the brain (https://doi.org/10.1126/science.1127980). The copy number of Olduvai domains has been linked to increased brain size (https://doi.org/10.1007/s00429-014-0814-9), and performance on IQ tests (https://doi.org/10.1007/s00439-014-1489-2), as well as neurodiverse states like autism (https://doi.org/10.1371/journal.pgen.1004241).", + "Precise evaluation of copy number in humans has been difficult to achieve, as ratiometric approaches fail to identify small changes when the total number is the ~300 range, as in humans. Read depth approaches using short read WGS data are promising, but most existing pipelines mask repeats altogether, and those that do measure copy number do it relative to the gene. However, Olduvai domains are known to exist in different number and kind within a family of genes, rendering this approach inoperative. The pipeline described here is used to identify the copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods, designed with DUF1220 copy number in mind.", + "Approximately 25 individuals were randomly chosen from each of the American (Utah -- Northern and Western European ancestry; CEU), Nigerian (Yoruba; YRI), Han Chinese (Beijing; CHB), Japanese (Tokyo; JPT), Mexican-American (Los Angeles; MXL), Colombian (Medellin; CLM), Puerto Rican (Puerto Rico; PUR), African-American (Southwest US; ASW), Luhya (Webuye, Kenya; LWK), Han Chinese (South China; CHS), Tuscan (Toscana, Italia; TSI), Spanish (Iberian populations; IBS), Finnish (Finland; FIN), and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. No intermediate files were generated because the commands were run executed as a pipe at the command line, so T:/dev/tmpfs was used for the file IOs in the Description Domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" + ], + "score": { + "usability_domain_length": 2287 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000301/1.0", + "usability_domain": [ + "Astrocytes are the most abundant glial cells in the brain. Over the past few decades, the classical view of astrocytes being merely supportive cells in the nervous system has dramatically extended to include the important roles of astrocytes in neuronal, blood vessel and glial function (https://doi.org/10.1016/j.tcb.2016.01.003). Astrocytes are known to respond to neurotransmitters released from neurons by increasing intracellular calcium. These calcium events can further propagate to neighboring cells through gap junctions, causing the release of gliotransmitters which bind to neuronal receptors and eventually facilitate synaptic transmission (https://doi.org/10.1016/j.neuron.2014.02.007). This crosstalk between astrocytes and neurons via calcium signaling indicates the active role of astrocytes in neural signaling and synaptic plasticity (DOI: 10.1109/TMBMC.2022.3142621). ", + "Given that astrocyte calcium signaling is critical to healthy regulation of neuronal activity and physiology, it is imperative to be able to study intercellular communication in astrocytes at the network level. While there are existing methods for analyzing calcium events in astrocytes, these tools are limited to evaluating calcium signaling at an individual cellular level. Additionally, these tools are often based on using pre-defined regions of interest (ROIs), which may offer bias in analysis. ", + "Astral is a novel tool that allows for analysis of the functional interactions between astrocytes mediated by their calcium signaling at the network level. Its unique pipeline quantifies astrocytic calcium events without the need for pre-defined ROIs, and analyzes intercellular propagation based on live-cell imaging. Astral consists of a core-processing pipeline for detection and quantification of Ca2+ events, as well as a visualization tool for data quality control. Altogether, Astral is powerful and novel tool that enables a novel approach in studying astrocyte-neuronal interactions at the network level ", + "https://doi.org/10.3389/fncel.2021.689268", + "This BCO provides information on Astral's core processing pipeline for the detection and quantification of calcium events (not the second-part visualization tool for data quality control)." + ], + "score": { + "usability_domain_length": 2232 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000088/1.0.36", + "usability_domain": [ + " A0392 Gene expression signature profile (measured by calculating the cosinor correlation of the sample expression profile to a template, (the mean expression profile of 44 tumors with a known good clinical outcome), and by determining the 70-gene molecular profile of the sample (Low Risk, High Risk) for breast cancer recurrence. The genes are AKAP2 (UPKB:Q9Y2D5), AP2B1 (UPKB:P63010), BBC3 (UPKB:Q9BXHI), CCN4 (UPKB:O95388), Clone HQ0310 PRO0310p1 (HQ0310), COL4A2 (UPKB:P08572), CMC2 (UPKB:Q9NRP2), ALDH4A1 (UPKB:P30038), DTL (UPKB:Q9NZJ0), DCK (UPKB:P27707), MCM6 (UPKB:Q14566), ESM1 (UPKB:Q9NQ30), ECI2 (UPKB:O75521), EXT1 (UPKB:Q16394), Expressed sequence tag (EST6), Expressed sequence tag (EST7), Expressed sequence tag (EST8), Expressed sequence tag (EST9), Expressed sequence tag (EST10), Expressed sequence tag (EST11), Expressed sequence tag (EST12), Expressed sequence tag (EST13), Expressed sequence tag (EST14), Expressed sequence tag (EST15), Expressed sequence tag (EST1), Expressed sequence tag (EST3), Expressed sequence tag (EST4), Expressed sequence tag (EST5), Expressed sequence tag (EST2), FGF18 (UPKB:O76093), CCNE2 (UPKB:O96020), GSTPK, GSTM3 (UPKB:P21266), GMPS (UPKB:P49915), GNAZ (UPKB:P19086), HEC, CENPA (UPKB:P49450), Homo sapiens mRNA cDNA DKFZp434C0931 (from clone DKFZp434C0931) partial cds (DKFZp434C0931mRNA, Hypothetical protein DKFZp564D0462 (DKFZp564D0462), Hypothetical protein FLJ11190 (FLJ11190), Hypothetical protein FLJ11354 (FLJ11354), Hypothetical protein FLJ12443 (FLJ12443), Hypothetical protein FLJ22477 (FLJ22477), IGFBP5 (UPKB:P24593), MELK (UPKB:Q14680), MMP9 (UPKB:P14780), CFFM4 (UPKB:Q9GZW8), MP1, NMU (UPKB:P48645), ORC6 (UPKB:Q9Y5N6), PLAAT3 (UPKB:P53816), ECT2 (UPKB:Q9H8V3), PRC1 (UPKB:O43663), RAB6B (UPKB:Q9NRW1), RFC4 (UPKB:P35249), Ser-Thr protein kinase related to the myotonic dystrophy protein kinase, SCUBE2 (UPKB:Q9NQ36), SM20, SERF1A (UPKB:O75920), SLC2A3 (UPKB:P11169), OXCT1 (UPKB:P55809), TSPYL5 (UPKB:Q86VY4), TMEFF1 (UPKB:Q8IYR6), EBF4 (UPKB:Q9BQW3), TGFB3 (UPKB:P10600), UCHL5 (UPKB:Q9Y5K5), FLT1 (UPKB:P17948). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 2145 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000303/3.0", + "usability_domain": [ + "Apoptosis is an important process for maintaining homeostasis, eliminating cells that have been damaged beyond repair, and preventing cancer. If there is a problem with apoptosis, it could lead to uncontrolled cell proliferation and development of a tumor (https://pubmed.ncbi.nlm.nih.gov/18846107/).", + "Proteins of the B-cell lymphoma-2 (Bcl-2) family are known to regulate programmed cell death through the mitochondrial apoptosis pathway. They are therefore considered pro-survival proteins given that a major characteristic of cancer is its ability to avoid programmed cell death (https://pubmed.ncbi.nlm.nih.gov/10398099/). This makes them a therapeutic target for drug discovery (https://pubmed.ncbi.nlm.nih.gov/28735187/). When there is intracellular stress, the apoptotic homeostasis is controlled by the activator and sensitizer BH3 (Bcl-2 homology 2)-only proteins. Changes in expression levels of these proteins can lead to an imbalance. More specifically, overexpression can cause oncogenic effects (https://pubmed.ncbi.nlm.nih.gov/24355989/). Normally, the Bcl-2 family proteins will bind to the BH3 motif of pro-apoptotic proteins to create a network of protein-protein interactions, but any dysfunction will result in the cancer cells to evade cell death. This study emphasizes the importance of future experimental research on the protein network between Bcl-2 and BH3 within a breast cancer context.", + "Bioinformatic approaches were done in order to link -omics with structural data. There was no change in the parameters from the default. This BCO represents the steps taken to identify protein interaction partners of the Bcl-2 family members that contain the BH3 motif in breast cancer samples. This was done by first retrieving the experimentally known Bcl-2 family interactions from the human Integrated Interaction Database (IID), and then the interaction list was filtered to only include those proteins containing the BH3 motif. Out of the 560 protein-protein interactions that were collected, 295 of them were selected as possible BH3-containing proteins. Out of the 295 proteins, 282 were identified as BH-3 only." + ], + "score": { + "usability_domain_length": 2132 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000059/1.0", + "usability_domain": [ + "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).\nThe current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.\nBioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.\nThe dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen.\nNote the mutation shown in GlyGen are annotated based on passing of one or more filter criteria.\n- Filter criteria 1 - Positive Patient's Frequency Ratio \\u2265 1.0%,\n- Filter criteria 2 - Presence of mutation in number of cancer types \\u2265 3 cancer types,\n- Filter criteria 3 - Positive Patient's count \\u2265 10 patients,\n- Filter criteria 4 - Mutation reported and annotated in number of databases,\n- Filter criteria 5 - Mutation mined from the literature using literature mining tools (human_protein_mutation_literature.csv),\nIf there is also a germline mutation reported in dbSNP, the rsid is also shown\nThese filter criteria annotations are shown on the front end under the column Annotation Name\nAlso if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.\nWhen mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen,\nTwo different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.\nOn the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amino acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." + ], + "score": { + "usability_domain_length": 2003 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000004/v-2.0.2", + "usability_domain": [ + "The Human Glycosyltransferases dataset contains list of human [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. The dataset was created by: Proteomes, gene and protein lists were retrieved from UniProtKB/Swiss-Prot. The full set of proteins comprising the human proteome was retrieved with the following search terms: keyword:\"Complete proteome [KW-0181]\" (now removed from UniProtKB) AND reviewed:yes AND organism:\"Homo sapiens (Human) [9606]\". To retrieve the list of all human glycosyltransferases, the full list of human proteins were filtered using Gene Ontology (GO) terms, with molecular function GO term, GO:0016757 transferase activity, transferring glycosyl groups, each corresponding to a unique gene. To ensure the gene list was comprehensive with respect to available annotations, entries were cross-referenced with two external glycomics databases: the Carbohydrate Active Enzymes database (CAZY, http://www.cazy.org/), the Consortium for Functional Glycomics (CFG) functional glycomics gateway (http://www.functionalglycomics.org/), BRENDA (https://www.brenda-enzymes.org/index.php) and Enzyme Portal (https://www.ebi.ac.uk/enzymeportal/). The list was then filtered to remove any duplicate references resulting from cross-database redundancy or secondary accessions. Entries not already contained in the GO-derived glycosyltransferase gene list were added to the list. InterPro and Pfam accessions were then retrieved for all genes currently in the list. For GlyGen, GTs that were not part of glycan metabolism were removed to produce a manually curated list of glycosyltransferase. If you use this dataset please provide proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 1985 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000004/v-2.1.1", + "usability_domain": [ + "The Human Glycosyltransferases dataset contains list of human [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. The dataset was created by: Proteomes, gene and protein lists were retrieved from UniProtKB/Swiss-Prot. The full set of proteins comprising the human proteome was retrieved with the following search terms: keyword:\"Complete proteome [KW-0181]\" (now removed from UniProtKB) AND reviewed:yes AND organism:\"Homo sapiens (Human) [9606]\". To retrieve the list of all human glycosyltransferases, the full list of human proteins were filtered using Gene Ontology (GO) terms, with molecular function GO term, GO:0016757 transferase activity, transferring glycosyl groups, each corresponding to a unique gene. To ensure the gene list was comprehensive with respect to available annotations, entries were cross-referenced with two external glycomics databases: the Carbohydrate Active Enzymes database (CAZY, http://www.cazy.org/), the Consortium for Functional Glycomics (CFG) functional glycomics gateway (http://www.functionalglycomics.org/), BRENDA (https://www.brenda-enzymes.org/index.php) and Enzyme Portal (https://www.ebi.ac.uk/enzymeportal/). The list was then filtered to remove any duplicate references resulting from cross-database redundancy or secondary accessions. Entries not already contained in the GO-derived glycosyltransferase gene list were added to the list. InterPro and Pfam accessions were then retrieved for all genes currently in the list. For GlyGen, GTs that were not part of glycan metabolism were removed to produce a manually curated list of glycosyltransferase. If you use this dataset please provide proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 1985 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000024/v-2.0.2", + "usability_domain": [ + "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).", + "The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.", + "BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.", + "The dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen. ", + "Note the mutation shown in GlyGen are annotated based on passing of one or more filter criterias.", + "Filter criteria 1 - Positive Patient's Frequency Ratio \u2265 1.0%", + "Filter criteria 2 - Presence of mutation in number of cancer types \u2265 3 cancer types", + "Filter criteria 3 - Positive Patient's count \u2265 10 patients", + "Filter criteria 4 - Mutation reported and annotated in number of databases", + "Filter criteria 5 - Mutation minned from the literature using literaure mining tools (human_protein_mutation_literature.csv)", + "If there is also a germline mutation reported in dbSNP, the rsid is also shown", + "These filter criteria annotations are shown on the front end under the column Annotation Name", + "Also if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.", + "When mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen", + "Two different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.", + "On the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amio acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." + ], + "score": { + "usability_domain_length": 1958 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000284/3.0", + "usability_domain": [ + "Apoptosis is an important process for maintaining homeostasis, eliminating cells that have been damaged beyond repair, and preventing cancer. If there is a problem with apoptosis, it could lead to uncontrolled cell proliferation and development of a tumor. ", + "Proteins of the B-cell lymphoma-2 (Bcl-2) family are known to regulate programmed cell death through the mitochondrial apoptosis pathway. They are therefore considered pro-survival proteins given that a major characteristic of cancer is its ability to avoid programmed cell death. This makes them a therapeutic target for drug discovery. When there is intracellular stress, the apoptotic homeostasis is controlled by the activator and sensitizer BH3 (Bcl-2 homology 2)-only proteins. Changes in expression levels of these proteins can lead to an imbalance. More specifically, overexpression can cause oncogenic effects. Normally, the Bcl-2 family proteins will bind to the BH3 motif of pro-apoptotic proteins to create a network of protein-protein interactions, but any dysfunction will result in the cancer cells to evade cell death. This study emphasizes the importance of future experimental research on the protein network between Bcl-2 and BH3 within a breast cancer context.", + "Bioinformatic approaches were done in order to link -omics with structural data. There was no change in the parameters from the default. This BCO represents the steps taken to identify protein interaction partners of the Bcl-2 family members that contain the BH3 motif in breast cancer samples. This was done by first retrieving the experimentally known Bcl-2 family interactions from the human Integrated Interaction Database (IID), and then the interaction list was filtered to only include those proteins containing the BH3 motif. Out of the 560 protein-protein interactions that were collected, 295 of them were selected as possible BH3-containing proteins. Out of the 295 proteins, 282 were identified as BH-3 only. " + ], + "score": { + "usability_domain_length": 1958 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000024/v-2.1.1", + "usability_domain": [ + "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).", + "The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.", + "BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.", + "The dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen. ", + "Note the mutation shown in GlyGen are annotated based on passing of one or more filter criterias.", + "Filter criteria 1 - Positive Patient's Frequency Ratio \u2265 1.0%", + "Filter criteria 2 - Presence of mutation in number of cancer types \u2265 3 cancer types", + "Filter criteria 3 - Positive Patient's count \u2265 10 patients", + "Filter criteria 4 - Mutation reported and annotated in number of databases", + "Filter criteria 5 - Mutation minned from the literature using literaure mining tools (human_protein_mutation_literature.csv)", + "If there is also a germline mutation reported in dbSNP, the rsid is also shown", + "These filter criteria annotations are shown on the front end under the column Annotation Name", + "Also if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.", + "When mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen", + "Two different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.", + "On the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amio acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." + ], + "score": { + "usability_domain_length": 1958 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000282/1.2.1", + "usability_domain": [ + "Natural killer (NK) cells are very diverse. They play major roles in hominid immunity and reproduction (10.3389/fimmu.2019.00177). Killer cell immunoglobulin- like receptors are key receptors for NK cells development and function in humans (https://doi.org/10.5045/kjh.2011.46.4.216). KIR's genes span 10-16kb each and recombination through the different haplotypes has made their order and copy number highly variable. Their genes encode proteins that recognize human leukocyte antigen (HLA) that initiate signaling pathways in NK cells. This leads to the release of cytokines or death of the target cell. ", + "Full haplotyping requires physical separation and amplification of maternal and paternal haplotypes via fosmids for subsequent sequencing. This approach is high cost and cannot be used for high scale studies. It is also difficult to interpret KIR haplotypes for an individual human genome considering the reads from high-throughput sequencing cannot be deciphered because the structural arrangements are unknown. This requires a more specific and careful interpretation of the KIR region than in most other regions in the human genome. The pipeline described here is used to capture, sequence, assemble and annotate diploid human KIR haplotypes. This approach is meant to efficiently capture 2-8kb fragments of DNA for better identification of haplotypes. ", + "In this study a cohort of 8 African Americans and 8 Europeans were used and the results demonstrated that every KIR gene and intergene contains constant regions that are targetable by capture probes. By targeting the constant regions the variable regions can be captured and sequenced by standard PacBio workflows. This approach also requires no prior knowledge of the individual or references, while only utilizing the standard lab workflows with access to free and open software. " + ], + "score": { + "usability_domain_length": 1846 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000269/1.0", + "usability_domain": [ + "Reliably annotated microbial genomes in public databases have been decreasing over the years and this could be due to automatic annotation of microbial genomes rather than doing a manual annotation. These poor annotations of automated microbial annotation pipelines are looking for the first/best hit and not based on the experimentally verified annotation. ", + "It\u2019s important to have a more reliable and accurate genome annotation as it\u2019s one of the key things to understand the complete genetic material of many organisms.", + "Multi-threaded Enhanced prokaryotic Genome Annotator (MEGAnnotator) is a bioinformatics pipeline that generates annotated GenBank files for microbial genomes using DNA shotgun sequencing reads.", + "It is beneficial for microbiologist researchers interested in genome analyses of bacteria and microbiome of organisms as it provides reduced redundant annotations and is efficient in pre-arranging assembly and annotation work to process NGS genome sequence data.", + "It is also a user-friendly pipeline that allows the annotation of complex genome analyses for investigators that cannot prepare their own bioinformatics pipeline.", + "Furthermore, it helps solve the problem of inaccurately automated annotation of microbial genomes, improves the quality of the microbial genome annotation, and allows the user to annotate partially pre-assembled genomes, as well as assemble metagenomic data sets.", + "As shown by the results where the number of contigs were reduced for each organism's output , MEGAnnotator is able to accurately depict and annotate the genome assembly that it curates. Thus allowing a more accurate and efficient (by lowering the time) way of genome annotation compared to other publicly accessed bioinformatics pipeline with automated annotation." + ], + "score": { + "usability_domain_length": 1764 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000295/0.1", + "usability_domain": [ + "This BCO represents an ALS gene scanning pipeline, through the adaption of a fast and efficient bioinformatics pipeline called DNAscan. DNAscan allows for the analysis of DNA next generation sequencing data, using very little memory usage and computational effort. ", + "This pipeline allows for the identification of genetic factors characterized by ALS, utilizing the identification of variants in more than 25 genes with a very strong correlation to ALS, in addition to variants in more than 120 genes with a weaker correlation to ALS. As a result of the rapidly increasing availability of next-generation sequencing data, patients and health care professionals are obtaining genomic information without the ability to evaluate and deduce their findings. In addition to this phenomenon, the relevance of variants in ALS genes is not easily evident. With the implementation of this pipeline, patients and providers are able to obtain an easily accessible tool that can provide an automatic and comprehensive annotated report on a list of ALS genes from whole-genome and whole-exome sequencing data on a typical computer in less than 5 hours.", + "The ALS gene scanning pipeline performs alignment, variant calling, structural variant calling, repeat expansion calling, and variant annotation using Annovar.", + "ALSgeneScanner restricts the analysis to a subset of genes associated with ALS, prioritizing variants according to scientific evidence of the gene association and the effect prediction of the variant.", + "The required input genome or exome sequencing data should be taken from ALS patients, which will then be referenced to hg19 or grch37." + ], + "score": { + "usability_domain_length": 1630 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000295/0.10", + "usability_domain": [ + "This BCO represents an ALS gene scanning pipeline, through the adaption of a fast and efficient bioinformatics pipeline called DNAscan. DNAscan allows for the analysis of DNA next generation sequencing data, using very little memory usage and computational effort. ", + "This pipeline allows for the identification of genetic factors characterized by ALS, utilizing the identification of variants in more than 25 genes with a very strong correlation to ALS, in addition to variants in more than 120 genes with a weaker correlation to ALS. As a result of the rapidly increasing availability of next-generation sequencing data, patients and health care professionals are obtaining genomic information without the ability to evaluate and deduce their findings. In addition to this phenomenon, the relevance of variants in ALS genes is not easily evident. With the implementation of this pipeline, patients and providers are able to obtain an easily accessible tool that can provide an automatic and comprehensive annotated report on a list of ALS genes from whole-genome and whole-exome sequencing data on a typical computer in less than 5 hours.", + "The ALS gene scanning pipeline performs alignment, variant calling, structural variant calling, repeat expansion calling, and variant annotation using Annovar.", + "ALSgeneScanner restricts the analysis to a subset of genes associated with ALS, prioritizing variants according to scientific evidence of the gene association and the effect prediction of the variant.", + "The required input genome or exome sequencing data should be taken from ALS patients, which will then be referenced to hg19 or grch37." + ], + "score": { + "usability_domain_length": 1630 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_022574/1.0", + "usability_domain": [ + "The purpose of this study is to identify the integration sites of retroviruses including a common one which is the HIV virus. The pipeline describes the steps after the DNA has been clones and sequences. The sequencing left the sample with artifacts that are then reduced using a bioinformatics pipeline. The first step is to differentiate reads by using their sample index as well as identification of both 5\u2019and 3\u2019 ends. Next, reference genomes are used (human sequenced genomes that were infected with the HIV virus) for genome alignment using a BLAST-like tool. The BLAT alignment allows for similar sequences to be classified at the chromosomal level based on a score system. The pipeline then removes sequences that do not match or that are not on the same chromosomes as artifacts. The alignment requires reads to be on the same chromosome, on opposite strands, with a size of not more than 1kb. The host breakpoint and the LTR DNA junctions only is considered a valid integration sites when those main criteria are met. In fact, this tool allows for the results to be less noisy as artifacts are removed thoroughly for better genomic outcomes. The pipeline predicts priming sites from the sequenced genomes and provides raw data sequence for the discovered integrations sites. The pipeline analysis was established and published on BMC genomics. The citation is as follows Wells, D.W., Guo, S., Shao, W. et al. An analytical pipeline for identifying and mapping the integration sites of HIV and other retroviruses. BMC Genomics 21, 216 (2020). https://doi.org/10.1186/s12864-020-6647-4" + ], + "score": { + "usability_domain_length": 1595 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.4", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool. default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. \nThe primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for all file types with the designation of ngsQC*, siteQC*, assembleyQC* and biosampleMeta* is following a consistent representation of the data properties.\n*files are of HIVE or NCBI origin. " + ], + "score": { + "usability_domain_length": 1548 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.5", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool. default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. \nThe primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for all file types with the designation of ngsQC*, siteQC*, assembleyQC* and biosampleMeta* is following a consistent representation of the data properties.\n*files are of HIVE or NCBI origin. " + ], + "score": { + "usability_domain_length": 1548 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.4", + "usability_domain": [ + "List of controlled vocabulary terms for data.ARGOSdb Annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool; default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. Currently all terms are sourced from the DRM_all_orgs.tsv (ARGOS_000055). The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1532 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.5", + "usability_domain": [ + "List of controlled vocabulary terms for data.ARGOSdb Annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool; default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. Currently all terms are sourced from the DRM_all_orgs.tsv (ARGOS_000055). The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1532 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.0", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1469 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.1", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1469 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.2", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1469 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000016/1.3", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1469 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000271/1.0", + "usability_domain": [ + "This BCO represents a pipeline called SEARCHIN, which has the ability to identify ligand-mediated interactions between different cellular compartments. The output of SEARCHIN consists of an extensively reduced list of potential interactions between ligands produced by one compartment and receptors produced in another compartment, formulating a ranked list. From this list, candidates for experimental validation can be effectively selected.", + "The pipeline was assessed through a model of amyotrophic lateral sclerosis (ALS), in which astrocytes expressing mutant superoxide dismutase-1 destroy wild-type motor neurons through an undetermined mechanism. The pipeline inferred that the top predicted ligand-receptor pairs as an interaction between astrocyte-released amyloid precursor protein and death receptor-6 on motor neurons. This inferred deleterious role of amyloid precursor protein and death receptor-6 was later confirmed in vitro in models of ALS.", + "This pipeline consists of well-validated algorithms like CINDy, VIPER, and PrePPI, which were not originally designed to study cell-cell communication processes, to test this methodology by generating testable and reasonable hypotheses that can be experimentally validated. Each individual step maintains their own default parameters and thresholds which are based on extensive benchmarks performed in previous original manuscripts in which the algorithms were originally produced." + ], + "score": { + "usability_domain_length": 1437 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.3", + "usability_domain": [ + "List of non-core controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property. The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1433 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.0", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1422 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.1", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1422 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000017/1.2", + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "score": { + "usability_domain_length": 1422 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000300/1.0", + "usability_domain": [ + "Bovine tuberculosis (bTB) is an epidemic disease in cattle as a result of infection with Mycobacterium bovis. While 99.95% similar to the human variant, M. tuberculosis, this disease primarily infects bovids like cattle and bison. The typical response in a positive infection is to slaughter the infected cattle, and thus the economic weight of bTB is extremely high.", + "Aerosolized mycobacterium interact with bovine alveolar macrophages (bAM) upon inhalation and either begin replicating or are cleared by the adaptive immune response. The bacteria use several mechanisms for immune evasion: inactivation of ROS and RNI, molecular mimicry, reduction of interferon signaling, etc. ", + "This study seeks to understand the genetic predisposition that certain cattle have for reducing the likelihood of a bTB establishment by using data from genome-wide association studies (GWAS) to compare the bAM genomic similarities and differences in cattle. ", + "Differentially expressed genes were analyzed using DESeq2 and this genomic data was run through the R suite DGCA and Cytoscape to generate the correlation networks at 24 hpi and 48 hpi. Of the three different pipelines used to analyze these infected bAM for gene-gene alterations, only the correlation network approach provided data on all three types of cattle tested in a post-infection context." + ], + "score": { + "usability_domain_length": 1334 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.0", + "usability_domain": [ + "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1328 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.1", + "usability_domain": [ + "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1328 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.2", + "usability_domain": [ + "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." + ], + "score": { + "usability_domain_length": 1328 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_016880/1.0", + "usability_domain": [ + "The purpose of this study is to identify the integration sites of retroviruses including a common one which is the HIV virus. The pipeline describes the steps after the DNA has been clones and sequences. The sequencing left the sample with artifacts that are then reduced using a bioinformatics pipeline. The first step is to differentiate reads by using their sample index as well as identification of both 5\u2019and 3\u2019 ends. Next, reference genomes are used (human sequenced genomes that were infected with the HIV virus) for genome alignment using a BLAST-like tool. The BLAT alignment allows for similar sequences to be classified at the chromosomal level based on a score system. The pipeline then removes sequences that do not match or that are not on the same chromosomes as artifacts. The alignment requires reads to be on the same chromosome, on opposite strands, with a size of not more than 1kb. The host breakpoint and the LTR DNA junctions only is considered a valid integration sites when those main criteria are met. In fact, this tool allows for the results to be less noisy as artifacts are removed thoroughly for better genomic outcomes. The pipeline predicts priming sites from the sequenced genomes and provides raw data sequence for the discovered integrations sites. " + ], + "score": { + "usability_domain_length": 1287 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.5", + "usability_domain": [ + "Core and Annotation definitions and properties of the controlled vocabulary (data dictionary) used by data.ARGOSdb data sets.", + "List of controlled vocabulary terms for data.ARGOSdb.org datasets and data properties. This data dictionary was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on data.ARGOSdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in data.ARGOSdb. A README and Release Notes are available per each version of the Data Dictionary. The primary use case for the data dictionary is to ensure all data submitted to data.argosdb.org is following a consistent representation of the data properties. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_files - A `|` separated list of dataset names where this property is utilized; recommended - The person or resource that suggested using the property; description - A definition and additional information about the property; source_or_type_def - The data source for obtaining the property." + ], + "score": { + "usability_domain_length": 1275 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.6", + "usability_domain": [ + "Core and Annotation definitions and properties of the controlled vocabulary (data dictionary) used by data.ARGOSdb data sets.", + "List of controlled vocabulary terms for data.ARGOSdb.org datasets and data properties. This data dictionary was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on data.ARGOSdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in data.ARGOSdb. A README and Release Notes are available per each version of the Data Dictionary. The primary use case for the data dictionary is to ensure all data submitted to data.argosdb.org is following a consistent representation of the data properties. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_files - A `|` separated list of dataset names where this property is utilized; recommended - The person or resource that suggested using the property; description - A definition and additional information about the property; source_or_type_def - The data source for obtaining the property." + ], + "score": { + "usability_domain_length": 1275 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000015/1.3", + "usability_domain": [ + "Core and Non-core definitions and properties of the controlled vocabulary (data dictionary) used by ARGOSdb data sets.", + "Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of definitions and properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property. The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org (Core or Non-Core) are following a consistent representation of the data properties so that they can be easily integrated, as well as QC'd for outliers. " + ], + "score": { + "usability_domain_length": 1274 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000084/v-2.0.2", + "usability_domain": [ + "The Human Protein Diseases dataset contains list of human [taxid:9606] diseases from OMIM, MONDO and Genomics England databases mapped to the UniProtKB protein canonical accessions and Disease Ontology (DO) identifiers..", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship.", + "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest.", + "The Disease Ontology has been developed as a standardized ontology for human disease with the purpose of providing the biomedical community with consistent, reusable and sustainable descriptions of human disease terms, phenotype characteristics and related medical vocabulary disease concepts through collaborative efforts of biomedical researchers, coordinated by the University of Maryland School of Medicine, Institute for Genome Sciences.", + "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, DO, and EMBL-EBI-UniProt and GlyGen" + ], + "score": { + "usability_domain_length": 1191 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000084/v-2.1.1", + "usability_domain": [ + "The Human Protein Diseases dataset contains list of human [taxid:9606] diseases from OMIM, MONDO and Genomics England databases mapped to the UniProtKB protein canonical accessions and Disease Ontology (DO) identifiers..", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship.", + "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest.", + "The Disease Ontology has been developed as a standardized ontology for human disease with the purpose of providing the biomedical community with consistent, reusable and sustainable descriptions of human disease terms, phenotype characteristics and related medical vocabulary disease concepts through collaborative efforts of biomedical researchers, coordinated by the University of Maryland School of Medicine, Institute for Genome Sciences.", + "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, DO, and EMBL-EBI-UniProt and GlyGen" + ], + "score": { + "usability_domain_length": 1191 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000268/v1.0.0", + "usability_domain": [ + "The LAMPS analysis pipeline facilitates the analysis of Ligation-Mediated Amplification (LMA) sequencing data and provides a thorough assessment of a library\u2019s reads for a variety of experimental parameters", + "LAMPS is the first pipeline to provide quality control reporting of LMA primers, allowing for easy identification of problematic primer pairs during design and data analysis of LMA experiments", + "LAMPS is a three-step pipeline which maps reads from a BAM or FASTQ file to expected alignments including a barcode, forward primer, and reverse primer, provides quality control reports for reads that are either too short or do not map to the database, and normalizes the read counts to reads per million (RPM) to allow for easy integration with downstream analysis pipelines.", + "Because LAMPS can be used with 5C or 2C-ChIP protocols and the use of either BLAST or Bowtie 2, output files listed in this BCO are limited to outputs that would be obtained using any method. Because of this, not all outputs are included. Outputs that are included are named as if 2C-ChIP data was used.", + "Either BLAST or Bowtie 2 can be used for LAMPS. SAMtools is only required if sequencing files are in BAM format." + ], + "score": { + "usability_domain_length": 1189 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000099/1.0.36", + "usability_domain": [ + "A0403 is the cobas EGFR Mutation Test v2 is a real-time PCR test for the qualitative detection of defined mutations of the epidermal growth factor receptor (EGFR) gene in non-small cell lung cancer (NSCLC) patients. Defined EGFR mutations are detected using DNA isolated from formalin-fixed paraffin-embedded tumor tissue (FFPET) or circulating-free tumor DNA (cfDNA) from plasma derived from EDTA anti-coagulated peripheral whole blood.The test is indicated as a companion diagnostic to aid in selecting NSCLC patients for treatment with the targeted therapies listed ... Drug FFPET PlasmaTARCEVA (erlotinib) Exon 19 deletions and L858R Exon 19 deletions and L858R TAGRISSO (osimertinib) 790M T790M ... Table 2 below that are also detected by the cobas EGFR Mutation Test v2: Table 2Drug FFPET PlasmaTARCEVA (erlotinib) G719X, exon 20 insertions, T790M, S768I and L861Q G719X, exon 20 insertions, T790M, S768I and L861QTAGRISSO (osimertinib) G719X, exon 19 deletions, L858R, exon 20 insertions, S768I, and L861Q G719X, exon 19 deletions, L858R, exon 20 insertions, S768I, and L861Q [FTCID:P150045]" + ], + "score": { + "usability_domain_length": 1102 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_019942/1.0", + "usability_domain": [ + "In Case Study 2, the objective of CAncer bioMarker Prediction Pipeline (CAMPP) was to analyze N-glycan abundances from tumor (TIF) and normal interstitial fluids (NIF) and serum via LC-MS/MS. Data was collected from 90 female breast cancer patients and the total number N-glycan groups identified were 165. This study demonstrates the relationship of N-glycosylation of breast cancer both in vitro and in vivo. Variable selection with CAMPP is implemented through linear models from mass spectrometry data for differential abundance analysis (DAA) and LASSO/Elastic-Net Regression. The result of this pipeline are tabular files and graphical representations of the dataset being studied. The biomolecules of interest from a large dataset can be found by applying different statistical tests and machine learning approaches. CAMPP creates a standardized way to screening for cancer biomarkers and other biomolecules of interest before their implementation in potential experiments. This pipeline was created based on the work of Terkelsen et al. doi: 10.1371/journal.pcbi.1007665" + ], + "score": { + "usability_domain_length": 1078 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_063167/1.0", + "usability_domain": [ + "Pipeline for interrogating Telomerase RNA Component (TERC) - chromatin interaction. Three libraries were assembled: HiChIRP TERC sample from human female B lymphoblastoid cell line (GM12878) as well as RNase (GM12878) and HeLa cell line TERC-knockout samples as negative controls. Libraries were generated by performing HiChIP/HiChIRP (https://doi.org/10.1038/s41592-019-0407-x) and Illumina paired-end sequencing on the samples. Sequencing data is processed according to standard protocol, including alignment, duplicate removal and filtering with HiC-Pro, before generating interaction matrices and calling loops with Juicer HiCCUPS algorithm. The pipeline will produce a list of high-confidence loop calls. In the original experiment the examination of the loops indicated a high number of telomere-telomere interaction (92%); additionally, the results showed that TERC was also associated with loops beyond telomeric regions, specifically at enhancer-promoter regions of several oncogenes, implying possible role of TERC beyond telomeres." + ], + "score": { + "usability_domain_length": 1043 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_019748/1.1", + "usability_domain": [ + "Shotgun Metagenomic Sequencing, generating paired-end reads, was performed on stool samples from individuals from different regions of the Democratic Republic of Congo (DRC). Some cohorts serve as geographical and disease controls, while disease cohorts have individuals with konzo. Konzo is a distinct upper motor neuron disease prevalent in sub-Saharan Africa, and consumption of bitter cassava, high in cyanogenic glycosides, is implicated in the disease. Here, Kraken2 and Bracken (developed by JHU) are used to assess the gut microbiome of individuals from the DRC, and downstream analysis of the differences in bacterial composition can be determine using the final output from this pipeline. This pipeline removes host reads (using BMtagger) from the paired-end reads fastq files generated from four lanes per sample, removes Illumina adapters (can be modified for the specific adapter sequence) (using skewer), and assigns the reads to different taxonomic classification (with standard database) using Kraken2 and Bracken." + ], + "score": { + "usability_domain_length": 1030 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.5", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT) for the original FDA BioProject and the National Collection of Pathogenic Viruses-sc-4123.", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted data (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221, National Collection of Pathogenic Viruses). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 1022 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000038/1.0", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT) for the original FDA BioProject and the National Collection of Pathogenic Viruses-sc-4123.", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted data (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221, National Collection of Pathogenic Viruses). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 1022 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000142/v-2.0.2", + "usability_domain": [ + "The dataset provides information on O-glycosylation sites on Human proteins. The data is submitted by Dr.Christina Woo from Department of Chemistry and Chemical Biology, Harvard University. The GlyTouCan accession (G70994MS)is annotated to the glycan composition based on author's recommendation. Author's note: \"For the glycan assignment, it would be most technically correct to use a HexNAc molecule, although by and large, these are O-GlcNAc modifications, we cannot be 100% certain for every single one\". The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). The source publication:Woo CM, Lund PJ, Huang AC, Davis MM, Bertozzi CR, Pitteri SJ. Mapping and Quantification of Over 2000 O-linked Glycopeptides in Activated Human T Cells with Isotope-Targeted Glycoproteomics (Isotag). Mol Cell Proteomics. 2018;17(4):764-75. doi: 10.1074/mcp.RA117.000261. PubMed PMID: 29351928; PubMed Central PMCID: PMCPMC5880114)." + ], + "score": { + "usability_domain_length": 992 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000142/v-2.1.1", + "usability_domain": [ + "The dataset provides information on O-glycosylation sites on Human proteins. The data is submitted by Dr.Christina Woo from Department of Chemistry and Chemical Biology, Harvard University. The GlyTouCan accession (G70994MS)is annotated to the glycan composition based on author's recommendation. Author's note: \"For the glycan assignment, it would be most technically correct to use a HexNAc molecule, although by and large, these are O-GlcNAc modifications, we cannot be 100% certain for every single one\". The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). The source publication:Woo CM, Lund PJ, Huang AC, Davis MM, Bertozzi CR, Pitteri SJ. Mapping and Quantification of Over 2000 O-linked Glycopeptides in Activated Human T Cells with Isotope-Targeted Glycoproteomics (Isotag). Mol Cell Proteomics. 2018;17(4):764-75. doi: 10.1074/mcp.RA117.000261. PubMed PMID: 29351928; PubMed Central PMCID: PMCPMC5880114)." + ], + "score": { + "usability_domain_length": 992 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000100/1.0.36", + "usability_domain": [ + "A0404 is a biomarler panel device that is indicated for: the THERASCREEN EGFR RGQ PCR KIT is a real-time pcr test for the qualitative detection of exon 19 deletions and exon 21 (L858R) substitution mutations of the epidermal growth factor receptor (EGFR) gene in DNA derived from formalin-fixed paraffin-embedded (FFPE) non-small cell lung cancer (NSCLC) tumor tissue. the test is intended to be used to select patients with NSCLC for whom gilotrjf (afatinib), an EGFR TYROSINE KINASE INHIBITOR (TKI), is indicated. safety and efficacy of gilotrif (afatinib) have not been established in patients whose tumors have L861Q, G719X, 87681, exon 20 insertions, and T790M mutations, which are also detected by the THERASCREEN\u00a0 EGFR RGQ PCR KIT. specimens are processed using the QIAAMP\u00a0 DSP DNA FFPE TISSUE KIT for manual sample preparation and the rotor-gene\u00a0 Q MDX instrument for automated amplification and detection. [FTCID:P120022]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 987 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000404/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref Pharos dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pharos UniProtKB accessions. The data is from Pharos API based on TCRD version 5.4.0. If you use this dataset please provide proper attribution to Pharos and GlyGen", + "Pharos is the user interface to the Knowledge Management Center (KMC) for the Illuminating the Druggable Genome (IDG) program funded by the National Institutes of Health.", + "The goal of KMC is to develop a comprehensive, integrated knowledge-base for the Druggable Genome (DG) to illuminate the uncharacterized and/or poorly annotated portion of the DG, focusing on three of the most commonly drug-targeted protein families: G-protein-coupled receptors (GPCRs), ion channels (ICs) and kinases", + "Nguyen, D.-T., Mathias, S. et al, Pharos: Collating Protein Information to Shed Light on the Druggable Genome , Nucl. Acids Res.i>, 2017, 45(D1), D995-D1002. DOI: 10.1093/nar/gkw1072", + "https://pharos.nih.gov/about" + ], + "score": { + "usability_domain_length": 981 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000404/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref Pharos dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pharos UniProtKB accessions. The data is from Pharos API based on TCRD version 5.4.0. If you use this dataset please provide proper attribution to Pharos and GlyGen", + "Pharos is the user interface to the Knowledge Management Center (KMC) for the Illuminating the Druggable Genome (IDG) program funded by the National Institutes of Health.", + "The goal of KMC is to develop a comprehensive, integrated knowledge-base for the Druggable Genome (DG) to illuminate the uncharacterized and/or poorly annotated portion of the DG, focusing on three of the most commonly drug-targeted protein families: G-protein-coupled receptors (GPCRs), ion channels (ICs) and kinases", + "Nguyen, D.-T., Mathias, S. et al, Pharos: Collating Protein Information to Shed Light on the Druggable Genome , Nucl. Acids Res.i>, 2017, 45(D1), D995-D1002. DOI: 10.1093/nar/gkw1072", + "https://pharos.nih.gov/about" + ], + "score": { + "usability_domain_length": 981 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000043/1.0.36", + "usability_domain": [ + " List of human [taxid:9606] genes with healthy RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at (https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx) Only the subset of RNA-Seq data are used to generate the expression profiles for healthy individuals for human used by OncoMX. From this data, a custom format of healthy data was generated for human containing the following information: Ensembl gene ID and UniProtKB accessions, Uberon anatomical entity IDs and names, Uberon developmental stage IDs and names, qualitative (high, medium, low, absent) reported expression levels for a queried gene with respect to all genes in a given tissue, similarly qualitative reported expression levels for a queried gene with respect to that same gene's expression across all tissues, the quality associated with the call, and a quantitative expression score based on ranks." + ], + "score": { + "usability_domain_length": 980 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000010/1.0", + "usability_domain": [ + "Reference-guided genome assemblies generated by the FDA-Argos Team using HIVE platform tools.", + "Paired-end fastq files are aligned to a NCBI GenBank sourced reference genome with HIVE-Hexagon, and a consensus fasta is obtained by HIVE-Heptagon. QC'ing for frameshift deletions and/or stop codons are detected under Phase Mutation tab in HIVE-Heptagon. The resulting assembly metrics are then analyzed using the Quast quality assessment tool. The FASTA header definition lines are generated by (example header included): (Example Header) >FA02SRR17261988|Marburg OS=Marburg marburgvirus OX=448086 OV=IRF0328_MARV_CI67 SV=1 RG=NC_001608.3; (Definitions): FA02SRR17261988 = Unique ID, FA=fasta, 02=#, SRR17261988 = sra_run_id; Marburg = UniProtKB Entry name; OS = Organism Name = Marburg marburgvirus; OX = Organism Identifier/Taxonomy = 448086; OV = Organism Variant Name = IRF0328_MARV_CI67; SV = Sequence Version = 1; RG = Reference Guided = NC_001608.3" + ], + "score": { + "usability_domain_length": 952 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000010/1.1", + "usability_domain": [ + "Reference-guided genome assemblies generated by the FDA-Argos Team using HIVE platform tools.", + "Paired-end fastq files are aligned to a NCBI GenBank sourced reference genome with HIVE-Hexagon, and a consensus fasta is obtained by HIVE-Heptagon. QC'ing for frameshift deletions and/or stop codons are detected under Phase Mutation tab in HIVE-Heptagon. The resulting assembly metrics are then analyzed using the Quast quality assessment tool. The FASTA header definition lines are generated by (example header included): (Example Header) >FA02SRR17261988|Marburg OS=Marburg marburgvirus OX=448086 OV=IRF0328_MARV_CI67 SV=1 RG=NC_001608.3; (Definitions): FA02SRR17261988 = Unique ID, FA=fasta, 02=#, SRR17261988 = sra_run_id; Marburg = UniProtKB Entry name; OS = Organism Name = Marburg marburgvirus; OX = Organism Identifier/Taxonomy = 448086; OV = Organism Variant Name = IRF0328_MARV_CI67; SV = Sequence Version = 1; RG = Reference Guided = NC_001608.3" + ], + "score": { + "usability_domain_length": 952 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000624/v-2.0.2", + "usability_domain": [ + "This mouse glycogenes dataset is retrieved from stem cell analysis which was done with qRT-PCR analysis [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened mouse[taxonomy:10090] protein names mapped to NCBI gene and MGI IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy. " + ], + "score": { + "usability_domain_length": 949 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000624/v-2.1.1", + "usability_domain": [ + "This mouse glycogenes dataset is retrieved from stem cell analysis which was done with qRT-PCR analysis [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened mouse[taxonomy:10090] protein names mapped to NCBI gene and MGI IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy. " + ], + "score": { + "usability_domain_length": 949 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_014961/1.0", + "usability_domain": [ + "Differential expression analysis based on RNA-seq data is a common method of transcriptomic analysis. Reads generated from an RNA-seq experiment are mapped to a reference genome and the resulting quantification of transcripts can be compared between case and control; potentially highlighting transcriptional (and therefore physiological) differences between samples. This paper is concerned with correctly identifying differentially expressed genes from RNA-seq data. The paper evaluates six read mapping methods and nine methods of differential expression analysis. Specifically, this pipeline uses real, RNA-Seq data from the Microarray Quality Control (MAQC) project and assesses the overlap of mapping results and differential expression results between tools. The pipeline that will become a BCO will take the RNA-seq data and use both python and R scripts to run analyses and generate a report of overlap results." + ], + "score": { + "usability_domain_length": 920 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_054419/1.0", + "usability_domain": [ + "Shotgun Metagenomic Sequencing, generating paired-end reads, was performed on stool samples from individuals from different regions of the Democratic Republic of Congo. Some cohorts serve as geographical and disease controls, while disease cohorts have individuals with konzo. Konzo is a distinct upper motor neuron disease, and consumption of bitter cassava, high in cyanogenic glucosides, is implicated in the disease. Here, Kraken2 and Bracken (developed by JHU) are used to assess the gut microbiome of individuals from the DRC, and downstream analysis of the differences in bacterial composition can be determine using the final output from this pipeline. This pipeline removes host reads (using BMtagger), removes Illumina adapters (can be modified for the specific adapters used) (using skewer), and assigns the reads to different taxonomic classification (with standard database) using Kraken2 and Bracken. " + ], + "score": { + "usability_domain_length": 915 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000623/v-2.0.2", + "usability_domain": [ + "This Human glycogenes dataset is retrieved a publication from UGA [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened human [taxonomy:9606] protein names mapped to NCBI gene and HGNC IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy." + ], + "score": { + "usability_domain_length": 911 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000623/v-2.1.1", + "usability_domain": [ + "This Human glycogenes dataset is retrieved a publication from UGA [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened human [taxonomy:9606] protein names mapped to NCBI gene and HGNC IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy." + ], + "score": { + "usability_domain_length": 911 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.0", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the FDA ARGOS BioProject (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 894 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.1", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the FDA ARGOS BioProject (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 894 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.2", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the selected BioProjects. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 880 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.4", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", + "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the selected BioProjects. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 880 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000102/1.0.36", + "usability_domain": [ + "A0406 is the therascreen KRAS RGQ PCR Kit is a real-time qualitative PCR assay used on the Rotor-Gene Q MDx instrument for the detection of seven somatic mutations in the human KRAS oncogene, using DNA extracted from formalin-fixed paraffin-embedded (FFPE), colorectal cancer (CRC) tissue. The therascreen KRAS RGQ PCR Kit is intended to aid in the identification of CRC patients for treatment with Erbitux (cetuximab) and Vectibix (panitumumab) based on a KRAS no mutation detected test result. The QIAGEN therascreen KRAS RGQ PCR Kit contains reagents for eight separate reactions; seven mutation specific reactions to amplify and detect mutations in codons 12 and 13 in exon 2 of the KRAS oncogene, and one Control Reaction that amplifies and detects a region of exon 4 in the KRAS oncogene. [FTCID:P110027]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 867 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_069535/1.0", + "usability_domain": [ + "A pipeline for curating the FDA's RVDB (v. 18) database of viral species. Pipeline was developed by Merck and Co., and is based on Lu and Salzberg (10.1371/journal.pcbi.1006277). Pipeline begins by i) converting viral genomes into pseudo reads, ii) alignment of pseudo reads to target database, iii) low-complexity masking, iv) masking viral genomes based on pseudo read classification, and v) addition of HIVE-specific sequence headers for integration into HIVE software. Pipeline includes a wrapper. See GitHub repository (linked in Extension Domain) for more information. The wrapper script reference (https://github.com/Merck/curation-open-source/blob/master/run_curation.sh) bundles the entire pipeline and enables execution in an HPC environment up to 200GB memory (which is required for Kraken2). This implementation was submitted using the qsub utility." + ], + "score": { + "usability_domain_length": 861 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_069535/2.0", + "usability_domain": [ + "A pipeline for curating the FDA's RVDB (v. 18) database of viral species. Pipeline was developed by Merck and Co., and is based on Lu and Salzberg (10.1371/journal.pcbi.1006277). Pipeline begins by i) converting viral genomes into pseudo reads, ii) alignment of pseudo reads to target database, iii) low-complexity masking, iv) masking viral genomes based on pseudo read classification, and v) addition of HIVE-specific sequence headers for integration into HIVE software. Pipeline includes a wrapper. See GitHub repository (linked in Extension Domain) for more information. The wrapper script reference (https://github.com/Merck/curation-open-source/blob/master/run_curation.sh) bundles the entire pipeline and enables execution in an HPC environment up to 200GB memory (which is required for Kraken2). This implementation was submitted using the qsub utility." + ], + "score": { + "usability_domain_length": 861 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000085/1.0.36", + "usability_domain": [ + "A list of human (taxid:9606) biomarkers, for COVID-19 (DOID:0080600), including overlap with diabetes mellitus (DOID:9351), and 15 high-level categories of cancer (DOID:162), manually curated from scientific publications recorded in PubMed (https://pubmed.ncbi.nlm.nih.gov/) and also retrieved from EDRN (Early Detection Research Network; https://edrn.nci.nih.gov/), FDA (U.S. Food and Drug Administration; https://www.fda.gov/). EDRN is an NCI collaboration dedicated to discovery of (early) cancer biomarkers. Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen, biomarker, and disease types; the biomarker entity and its measurement modality (e.g., increased expression); and cross reference (programmatic linkage or resource ID) to established data repositories, models, and codes." + ], + "score": { + "usability_domain_length": 852 + } + }, + { + "object_id": "https://biocomputeobject.org/DEMO_000001/0.50.0", + "usability_domain": [ + "The workflow starts with selecting ACE2 as the search term. More information about the gene was then obtained with the MyGene.info API [1,2]. Next, the GlyGen database [3] was searched to identify a relevant set of proteins that originate from ACE2. The glycosylation data was extracted from the GlyGen protein response and prepared for presentation in the view metanode.\n\n1. Xin, J. et al. High-performance web services for querying gene and variant annotation. Genome Biology vol. 17 (2016). doi:10.1186/s13059-016-0953-9\n2. Wu, C., MacLeod, I. & Su, A. I. BioGPS and MyGene.info: organizing online, gene-centric information. Nucleic Acids Research vol. 41 D561\u2013D565 (2012). doi:10.1093/nar/gks1114\n3. York, W. S. et al. GlyGen: Computational and Informatics Resources for Glycoscience. Glycobiology vol. 30 72\u201373 (2019). doi:10.1093/glycob/cwz080" + ], + "score": { + "usability_domain_length": 849 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.5", + "usability_domain": [ + "QC of genome assemblies (FASTA files) using HIVE Platform. ", + "Genome assemblies are either selected from the FDA ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " + ], + "score": { + "usability_domain_length": 843 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.6", + "usability_domain": [ + "QC of genome assemblies (FASTA files) using HIVE Platform. ", + "Genome assemblies are either selected from the FDA ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " + ], + "score": { + "usability_domain_length": 843 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.7", + "usability_domain": [ + "QC of genome assemblies (FASTA files) using HIVE Platform. ", + "Genome assemblies are either selected from the FDA-ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " + ], + "score": { + "usability_domain_length": 843 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.8", + "usability_domain": [ + "QC of genome assemblies (FASTA files) using HIVE Platform. ", + "Genome assemblies are either selected from the FDA-ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " + ], + "score": { + "usability_domain_length": 843 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_027463/1.0", + "usability_domain": [ + "Alternative splicing is when multiple isoforms are made from the same gene due to alternative selection of exons or splice sites. Although alternative splicing generally produces linear RNA, one largely unexplored form leads to a circular form of RNA, called circRNA. There is a great amount of interest in these sequences and their interactions because they may be implicated in diseases such as cancer. As such, there have been numerous tools to find and detect circRNA sequences. Present tools only detect presence/absence of the sequence however and do not account for conditions where circRNA is differentially expressed, as it often is. SeekCRIT aims to solve this issue by offering detection, quantification, and statistical analysis for circRNAs. In this work, seekCRIT's capabilities are assessed using publicly available RNA data." + ], + "score": { + "usability_domain_length": 840 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000143/v-2.0.2", + "usability_domain": [ + "The dataset provides information on N-glycosylation sites on Human proteins. The data has been processed from the supplementary material from 2 publications (1. \"Deeb, S. J., Cox, J., Schmidt-Supprian, M., & Mann, M. (2013). N-linked Glycosylation Enrichment for In-depth Cell Surface Proteomics of Diffuse Large B-cell Lymphoma Subtypes. Molecular & Cellular Proteomics, 13(1), 240-251. doi:10.1074/mcp.m113.033977\" 2. \"Boersema, P. J., Geiger, T., Winiewski, J. R., & Mann, M. (2012). Quantification of the N-glycosylated Secretome by Super-SILAC During Breast Cancer Progression and in Human Blood Samples. Molecular & Cellular Proteomics, 12(1), 158-171. doi:10.1074/mcp.m112.023614\"). The listed proteins (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). " + ], + "score": { + "usability_domain_length": 821 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000143/v-2.1.1", + "usability_domain": [ + "The dataset provides information on N-glycosylation sites on Human proteins. The data has been processed from the supplementary material from 2 publications (1. \"Deeb, S. J., Cox, J., Schmidt-Supprian, M., & Mann, M. (2013). N-linked Glycosylation Enrichment for In-depth Cell Surface Proteomics of Diffuse Large B-cell Lymphoma Subtypes. Molecular & Cellular Proteomics, 13(1), 240-251. doi:10.1074/mcp.m113.033977\" 2. \"Boersema, P. J., Geiger, T., Winiewski, J. R., & Mann, M. (2012). Quantification of the N-glycosylated Secretome by Super-SILAC During Breast Cancer Progression and in Human Blood Samples. Molecular & Cellular Proteomics, 12(1), 158-171. doi:10.1074/mcp.m112.023614\"). The listed proteins (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). " + ], + "score": { + "usability_domain_length": 821 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000453/1.0", + "usability_domain": [ + "Identifying disease subtypes (cancer) is clinically very significant in patient survival prediction", + "It explicitly models patient survival as the objective and is predictive of new patient survival risks. DeepProg constructs a flexible ensemble of hybrid-models (a combination of deep-learning and machine learning models) and integrates their outputs following the ensemble learning paradigm.", + "DeepProg was applied on RNA-Seq, Methylation and miRNA data from 32 cancers in The Cancer Genome Atlas (TCGA),from NCBI, with a total of around 10,000 samples.", + "The results from the DeepProg method are compared to results from the Similarity Network Fusion (SNF) algorithm, used to identify cancer subtypes linked to survival by others. ", + "In all, DeepProg yields much better log-rank p values and C-indices than the SNF method. " + ], + "score": { + "usability_domain_length": 815 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000453/2.0", + "usability_domain": [ + "Identifying disease subtypes (cancer) is clinically very significant in patient survival prediction", + "It explicitly models patient survival as the objective and is predictive of new patient survival risks. DeepProg constructs a flexible ensemble of hybrid-models (a combination of deep-learning and machine learning models) and integrates their outputs following the ensemble learning paradigm.", + "DeepProg was applied on RNA-Seq, Methylation and miRNA data from 32 cancers in The Cancer Genome Atlas (TCGA),from NCBI, with a total of around 10,000 samples.", + "The results from the DeepProg method are compared to results from the Similarity Network Fusion (SNF) algorithm, used to identify cancer subtypes linked to survival by others. ", + "In all, DeepProg yields much better log-rank p values and C-indices than the SNF method. " + ], + "score": { + "usability_domain_length": 815 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.4", + "usability_domain": [ + "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. We also ran the same script on assemblies in the ARGOS DB that were not in the original BioProject and then combined both results into one list. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 814 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.5", + "usability_domain": [ + "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. We also ran the same script on assemblies in the ARGOS DB that were not in the original BioProject and then combined both results into one list. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 814 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000438/0.1", + "usability_domain": [ + "For this data set, we pulled biomarker data from OpenTargets. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 795 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000438/0.2", + "usability_domain": [ + "For this data set, we pulled biomarker data from OpenTargets. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 795 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000499/v-2.0.2", + "usability_domain": [ + "The GlyGen-PubChem Protein Cross-references dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of PubChem Protein to GlyGen cross-references. In the dataset, the UniProtKB glycoprotein accessions are mapped to the RefSeq accessions and glycosylation annotation corresponding to glycoprotein have been added. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 792 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000499/v-2.1.1", + "usability_domain": [ + "The GlyGen-PubChem Protein Cross-references dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of PubChem Protein to GlyGen cross-references. In the dataset, the UniProtKB glycoprotein accessions are mapped to the RefSeq accessions and glycosylation annotation corresponding to glycoprotein have been added. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 792 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000442/0.2", + "usability_domain": [ + "For this data set, we pulled biomarker data from MarkerDB. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 792 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000442/0.3", + "usability_domain": [ + "For this data set, we pulled biomarker data from MarkerDB. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 792 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000443/0.2", + "usability_domain": [ + "For this data set, we pulled biomarker data from ClinVar. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 791 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000443/0.3", + "usability_domain": [ + "For this data set, we pulled biomarker data from ClinVar. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 791 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000441/0.1", + "usability_domain": [ + "For this data set, we pulled biomarker data from GWAS. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 788 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000441/0.2", + "usability_domain": [ + "For this data set, we pulled biomarker data from GWAS. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." + ], + "score": { + "usability_domain_length": 788 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000379/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Virus Pathogen Resource contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." + ], + "score": { + "usability_domain_length": 780 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000378/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Virus Pathogen Resource (ViPR) contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." + ], + "score": { + "usability_domain_length": 780 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000716/v-2.1.1", + "usability_domain": [ + "The Human O-Gluc Glycosylation Sites contains human (taxid:9606) O-Glucosylation sites from OGLUT2 and POGLUT3 O - glucosylate multiple EGF repeats in fibrillin - 1, -2, and LTBP1 and promote secretion of fibrillin - 1 [PMID:34411563] (experimental) and based on the consensus sequence the other sites (predicted) were detected using Prosite Prorule. The GlyTouCan ID for O-Gluc is G71142DF and the consensus sequence for O-gluc glycosylation is C3-x-N-T-x-G-S-(FY)-x-C4. The enzymes for o-glucosylation is POGLUT2 and POGLUT3. The dataset also contains Glycosylation stoichiometry: The relative amount (percentage) of a glycoform based on peptide signal intensity determined by mass spectrometry.", + "The dataset is generated in the lab of Robert S Haltiwanger by Daniel Williamson." + ], + "score": { + "usability_domain_length": 778 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000020/1.2", + "usability_domain": [ + "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", + "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." + ], + "score": { + "usability_domain_length": 769 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000020/1.3", + "usability_domain": [ + "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", + "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." + ], + "score": { + "usability_domain_length": 769 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000020/1.5", + "usability_domain": [ + "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", + "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." + ], + "score": { + "usability_domain_length": 769 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000028/v-2.0.2", + "usability_domain": [ + "The Gene Expression (Normal) dataset contains the normal expression information of human [taxid:9606] genes from BioXpress database. The GlyGen Interface shows the normal gene expression information for select UBERON ids only which are mapped to corresponding DOIDs and have gene expression in cancer information for those DOID. For more gene expression information of other UBERON IDs (tissues) for a given gene, please refer to BioXpress - https://hive.biochemistry.gwu.edu/bioxpress. BioXpress is a gene/miRNA expression and disease association database with expression levels mapped to genes or miRNAs. The current version of BioXpress contains only genes associated with cancer. If you are using this dataset please give proper attribution to GlyGen and BioXpress" + ], + "score": { + "usability_domain_length": 768 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000028/v-2.1.1", + "usability_domain": [ + "The Gene Expression (Normal) dataset contains the normal expression information of human [taxid:9606] genes from BioXpress database. The GlyGen Interface shows the normal gene expression information for select UBERON ids only which are mapped to corresponding DOIDs and have gene expression in cancer information for those DOID. For more gene expression information of other UBERON IDs (tissues) for a given gene, please refer to BioXpress - https://hive.biochemistry.gwu.edu/bioxpress. BioXpress is a gene/miRNA expression and disease association database with expression levels mapped to genes or miRNAs. The current version of BioXpress contains only genes associated with cancer. If you are using this dataset please give proper attribution to GlyGen and BioXpress" + ], + "score": { + "usability_domain_length": 768 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000084/1.0.36", + "usability_domain": [ + "Human [taxid:9606] cancer mutations from the Biomuta project and filtered for specific cancers presented on the OncoMX and Glygen front end web pages. The cancers included in this dataset are: Stomach Cancer (DOID:10534), Thyroid Cancer (DOID:1781), Esophageal Cancer (DOID:5041), Kidney Cancer (DOID:263), Lung Cancer (DOID:1324), Uterine Cancer (DOID:363), Urinary Bladder Cancer (DOID:11054), Prostate Cancer (DOID:10283), Colorectal Cancer (DOID:9256), Liver Cancer (DOID:3571), Cervical Cancer (DOID:4362), Breast Cancer (DOID:1612), Brain Cancer (DOID:1319), Hematologic Cancer (DOID:2531), Head and Neck Cancer (DOID:11934), Adrenal Gland Cancer (DOID:3953), Pancreatic Cancer (DOID:1793), Ovarian Cancer (DOID:2394), and Skin Cancer (DOID:4159)" + ], + "score": { + "usability_domain_length": 752 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000296/v-2.0.2", + "usability_domain": [ + " The dataset provides cross-references to ChEBI Id's for the associated glycan (GlyTouCan Accession). The mapping is produced using the direct download files(database_accession.tsv) from the ChEBI-EBI FTP (ftp://ftp.ebi.ac.uk/pub/databases/chebi/). The glycans are mapped/registered into the ChEBI database as follows: 1) GlyTouCan accession is mapped directly to ChEBI ID through PubChem CID (if the PubChem CID has a ChEBI cross-reference) 2) glycans with PubChem CID (which lack a ChEBI cross-reference) are integrated into ChEBI through applications like KNIME (https://europepmc.org/article/med/28757290) and ClassyFire (https://pubmed.ncbi.nlm.nih.gov/27867422/). 2) glycans without a PubChem CID are registered manually into the CHEBI database." + ], + "score": { + "usability_domain_length": 751 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000296/v-2.1.1", + "usability_domain": [ + " The dataset provides cross-references to ChEBI Id's for the associated glycan (GlyTouCan Accession). The mapping is produced using the direct download files(database_accession.tsv) from the ChEBI-EBI FTP (ftp://ftp.ebi.ac.uk/pub/databases/chebi/). The glycans are mapped/registered into the ChEBI database as follows: 1) GlyTouCan accession is mapped directly to ChEBI ID through PubChem CID (if the PubChem CID has a ChEBI cross-reference) 2) glycans with PubChem CID (which lack a ChEBI cross-reference) are integrated into ChEBI through applications like KNIME (https://europepmc.org/article/med/28757290) and ClassyFire (https://pubmed.ncbi.nlm.nih.gov/27867422/). 2) glycans without a PubChem CID are registered manually into the CHEBI database." + ], + "score": { + "usability_domain_length": 751 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000079/1.0.36", + "usability_domain": [ + "A list of human (taxid:9606) biomarkers, for several high-level categories of cancer (DOID:162), retrieved from EDRN (Early Detection Research Network; https://edrn.nci.nih.gov/), FDA (U.S. Food and Drug Administration; https://www.fda.gov/), or scientific publications recorded in PubMed (https://pubmed.ncbi.nlm.nih.gov/). EDRN is an NCI collaboration dedicated to discovery of (early) cancer biomarkers. Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen, biomarker, and disease types; the biomarker entity and its measurement modality (e.g., increased expression); and cross reference (programmatic linkage or resource ID) to established data repositories, models, and codes. " + ], + "score": { + "usability_domain_length": 748 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000410/v-2.0.2", + "usability_domain": [ + "The GlyGen-UniProtKB Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of UniProtKB to GlyGen crossreferences. In the dataset, the UniProtKB glycoprotein accessions have corresponding glycosylation annotation present in the datasets. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 733 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000410/v-2.1.1", + "usability_domain": [ + "The GlyGen-UniProtKB Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of UniProtKB to GlyGen crossreferences. In the dataset, the UniProtKB glycoprotein accessions have corresponding glycosylation annotation present in the datasets. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 733 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000228/v-2.0.2", + "usability_domain": [ + "The Human Protein Signal Peptide dataset contains human [taxid:9606] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 731 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000228/v-2.1.1", + "usability_domain": [ + "The Human Protein Signal Peptide dataset contains human [taxid:9606] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 731 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000101/1.0.36", + "usability_domain": [ + "A405 is the cobas KRAS Mutation Test, for use with the cobas 4800 System, is a real-time PCR test for the detection of seven somatic mutations in codons 12 and 13 of the KRAS gene in DNA derived from formalin-fixed paraffin-embedded human colorectal cancer (CRC) tumor tissue. The test is intended to be used as an aid in the identification of CRC patients for whom treatment with Erbitux (cetuximab) or with Vectibix (panitumumab) may be indicated based on a no mutation detected result. Specimens are processed using the cobas DNA Sample Preparation Kit for manual sample preparation and the cobas z 480 analyzer for automated amplification and detection. [FTCID:P140023]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 730 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000456/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniprotKB Xref RefSeq contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 724 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000456/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniprotKB Xref RefSeq contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 724 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_095544/1.0", + "usability_domain": [ + "Pipeline for investigating the infection process by parasitic nematodes, using H.bacteriophora as a model to identify infection genes. Reads from two RNA samples of H.bacteriophora infective juveniles, one treated with the insect Manduca sexta for 9 hours and one 0 hr untreated control, were collected, trimmed, and analyzed by mapping to the reference genome. Finally, mapped reads were assigned genomic features in order to provide summarized data of the coverage for the genomic features of interest. Data from this pipeline can also be analyzed further by expression analysis to identify differentially expressed genes. This example pipeline was created based on the work of Adnal et al. doi: 10.1186/s12864-016-3468-6" + ], + "score": { + "usability_domain_length": 723 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000455/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref RefSeq contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 710 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000455/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref RefSeq contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 710 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000069/1.0", + "usability_domain": [ + "\"Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]\",\n \"Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\",\n \"Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\",\n \"GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20\"" + ], + "score": { + "usability_domain_length": 709 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000443/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Signal Peptide dataset contains sarscov2 [taxid:2697049] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 704 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000443/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Signal Peptide dataset contains sarscov2 [taxid:2697049] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 704 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.7", + "usability_domain": [ + "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", + "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 703 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.8", + "usability_domain": [ + "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", + "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 703 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.9", + "usability_domain": [ + "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", + "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 703 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000104/1.0.36", + "usability_domain": [ + "A0408 is a panel device indicated for: the THXID BRAF kit is an in vitro diagnostic device intended for the qualitative detection of the BRAF V600E and V600K mutations in DNA samples extracted from formalin-fixed paraffin embedded (ffpe) human melanoma tissue. the THXID BRAF KIT is a real-time PCR test on the abi 7500 fast dx system and is intended to be used as an aid in selecting melanoma patients whose tumors carry the BRAF v600e mutation for treatment with dabrafenib [tafinlar ] and as an aid in selecting melanoma patients whose tumors carry the BRAF v600e or v600k mutation for treatment with trametinib [mekinist]. [FTCID:P120014]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 699 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000442/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Signal Peptide dataset contains sarscov1 [taxid:694009] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 690 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000442/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Signal Peptide dataset contains sarscov1 [taxid:694009] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 690 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000458/1.0", + "usability_domain": [ + "Using ML to predict moonlighting DNA-binding proteins (mDBPs) from other DNA-binding proteins (oDBPS) to advance our understanding of multifunctional proteins", + "Made use of publicly available moonlighting databases to develop trainable models", + "Moonlighting have been taken from Moonprot 3.0, MultitaskProtDB-II and MoonDB 2.0 ", + "mDBPs can indeed be predicted from proposed feature sets with reasonable confidence.(feature sets:\n(i) single protein sequence and predicted binding site features, (ii) sequence-based evolutionary features, (iii) network features based on protein\u2013protein interactions, (iv) sequence-predicted secondary structural features and (v) global gene expression profiles.)" + ], + "score": { + "usability_domain_length": 685 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000127/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref RefSeq contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 684 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000128/v-2.0.2", + "usability_domain": [ + "The Mouse UniprotKB Xref RefSeq contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 684 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000127/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref RefSeq contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 684 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000128/v-2.1.1", + "usability_domain": [ + "The Mouse UniprotKB Xref RefSeq contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 684 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000062/1.0", + "usability_domain": [ + "- Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]\n- Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\n- Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\n- GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "score": { + "usability_domain_length": 682 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000653/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Signal Peptide dataset contains fruitfly [taxid:7227] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. Signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 682 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000653/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Signal Peptide dataset contains fruitfly [taxid:7227] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. Signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 682 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000059/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] n-glycans attached to serum proteins expressed by liver tissue for patients with cirrhosis, hepatocellular carcinoma, or transplant. The measured glycan data were generated in collaboration with the IMAT Glyco-typer project and mapped to UniProtKB accessions of serum glycoproteins, Uberon anatomical entity IDs, glycan composition (m/z value-based), and GlyTouCan IDs. The dataset displays normalized intensity values (quantities) of specific n-glycans detected by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for specific serum proteins in hepatocellular carcinoma, cirrhosis, and liver transplant patient cohorts." + ], + "score": { + "usability_domain_length": 680 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000693/v-2.0.2", + "usability_domain": [ + "The dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Gene and Drug Landing Page Aggregator resource. If you use this dataset please provide proper attribution to Gene and Drug Landing Page Aggregator and GlyGen", + "Gene and Drug Landing Page Aggregator (GDLPA) has links to 53 gene, 18 variant and 19 drug repositories that provide direct links to gene and drug landing pages. You can search by gene or drug name and then choose the sites that contain knowledge about your gene or drug of interest. Resources supported by the NIH Common Fund are listed first and have the CFDE logo at their top right corner - https://cfde-gene-pages.cloud/." + ], + "score": { + "usability_domain_length": 674 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000693/v-2.1.1", + "usability_domain": [ + "The dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Gene and Drug Landing Page Aggregator resource. If you use this dataset please provide proper attribution to Gene and Drug Landing Page Aggregator and GlyGen", + "Gene and Drug Landing Page Aggregator (GDLPA) has links to 53 gene, 18 variant and 19 drug repositories that provide direct links to gene and drug landing pages. You can search by gene or drug name and then choose the sites that contain knowledge about your gene or drug of interest. Resources supported by the NIH Common Fund are listed first and have the CFDE logo at their top right corner - https://cfde-gene-pages.cloud/." + ], + "score": { + "usability_domain_length": 674 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000335/v-2.0.2", + "usability_domain": [ + "The HCV1a [TaxID:1108] glycosylations sites dataset contains glycosylation sites experimentally reported on Hepatitis C Virus 1a protein. The data was extracted from publication PMID:18187336. (Iacob RE, Perdivara I, Przybylski M, Tomer KB. Mass spectrometric characterization of glycosylation of hepatitis C virus E2 envelope glycoprotein reveals extended microheterogeneity of N-glycans. J Am Soc Mass Spectrom. 2008;19(3):428\u2013444. doi:10.1016/j.jasms.2007.11.022). The glycan compositions from publication [PMID:18187336] were assigned to GlyTouCan accessions by UniCarbKB. [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]" + ], + "score": { + "usability_domain_length": 671 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.6", + "usability_domain": [ + "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 671 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000335/v-2.1.1", + "usability_domain": [ + "The HCV1a [TaxID:1108] glycosylations sites dataset contains glycosylation sites experimentally reported on Hepatitis C Virus 1a protein. The data was extracted from publication PMID:18187336. (Iacob RE, Perdivara I, Przybylski M, Tomer KB. Mass spectrometric characterization of glycosylation of hepatitis C virus E2 envelope glycoprotein reveals extended microheterogeneity of N-glycans. J Am Soc Mass Spectrom. 2008;19(3):428\u2013444. doi:10.1016/j.jasms.2007.11.022). The glycan compositions from publication [PMID:18187336] were assigned to GlyTouCan accessions by UniCarbKB. [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]" + ], + "score": { + "usability_domain_length": 671 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.7", + "usability_domain": [ + "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 670 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.9", + "usability_domain": [ + "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 670 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000625/v-2.1.1", + "usability_domain": [ + "This is a Human Protein Biomarkers dataset. It contains human [taxonomy:9606] protein biomarker information, including cross-mapped UniProtKB or Protein Ontology accessions, assessed biomarker entity, biomarker status, biomarker type, specimen type, LOINC code, disease name, and source evidence from the cancer biomarker portal (https://data.oncomx.org/cancerbiomarkers). This dataset can be used to obtain biomarker information for 19 cancer types and COVID-19. If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite: PMID:32142370, PMID:34015823, and PMID:31616925. The dataset was earlier called as human_protein_cancer_biomarkers.csv" + ], + "score": { + "usability_domain_length": 669 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.3", + "usability_domain": [ + "BioSample metadata extracted from the original FDA BioProject in NCBI. ", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific biosamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 664 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000038/1.3", + "usability_domain": [ + "Assembly quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", + "\nFor this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all assembly IDs from the selected BioProject (PRJNA231221). Once we had the list of assembly ids, we constructed a CURL command to download each of the assembly XML files containing the assembly quality control data. The resulting file contains quality control data for all assemblies in the BioProject (PRJNA231221). The primary use case for this data set is to provide a file that contains quality control data for all assemblies in the selected BioProject." + ], + "score": { + "usability_domain_length": 663 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000038/1.4", + "usability_domain": [ + "Assembly quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", + "\nFor this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all assembly IDs from the selected BioProject (PRJNA231221). Once we had the list of assembly ids, we constructed a CURL command to download each of the assembly XML files containing the assembly quality control data. The resulting file contains quality control data for all assemblies in the BioProject (PRJNA231221). The primary use case for this data set is to provide a file that contains quality control data for all assemblies in the selected BioProject." + ], + "score": { + "usability_domain_length": 663 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000457/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Signal Peptide dataset contains mouse [taxid:10090] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 662 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000628/v-2.0.2", + "usability_domain": [ + "This is a SARS-CoV1 Glycosylation Sites (UniCarbKB) dataset. It contains a list of SARS coronavirus (SARS-CoV-1) [taxonomy:694009] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB. The listed proteins (UniProtKB accession) are part of the GlyGen UniProtKB SARS CoV1 Proteome Masterlist (https://data.glygen.org/GLY_000467). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:24234447, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining a list of glycosylation sites for canonical glycoproteins from the UnicarbKB database." + ], + "score": { + "usability_domain_length": 662 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000457/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Signal Peptide dataset contains mouse [taxid:10090] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 662 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000628/v-2.1.1", + "usability_domain": [ + "This is a SARS-CoV1 Glycosylation Sites (UniCarbKB) dataset. It contains a list of SARS coronavirus (SARS-CoV-1) [taxonomy:694009] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB. The listed proteins (UniProtKB accession) are part of the GlyGen UniProtKB SARS CoV1 Proteome Masterlist (https://data.glygen.org/GLY_000467). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:24234447, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining a list of glycosylation sites for canonical glycoproteins from the UnicarbKB database." + ], + "score": { + "usability_domain_length": 662 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000458/v-2.0.2", + "usability_domain": [ + "The Rat Protein Signal Peptide dataset contains rat [taxid:10116] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 658 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000458/v-2.1.1", + "usability_domain": [ + "The Rat Protein Signal Peptide dataset contains rat [taxid:10116] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 658 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000284/v-2.0.2", + "usability_domain": [ + "The dataset provides information on biosynthetic enzymes (UniProtKB Ac.) associated to the glycan structures (GlyTouCan Accessions). The enzymes listed are human or mouse proteins that are associated with the canonical monosaccharides. The association of a glycan structure to the canonical monosaccharides are based on the Glyco tree model. The glycans listed in the database may or may not have a human (TaxID:9606) or mouse (TaxID:10090) species annotation from the source (GlyTouCan or UniCarbKB) database. Please note that a human or mouse biosynthetic enzyme association should not be considered as a human or mouse species annotation for that glycan." + ], + "score": { + "usability_domain_length": 657 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000284/v-2.1.1", + "usability_domain": [ + "The dataset provides information on biosynthetic enzymes (UniProtKB Ac.) associated to the glycan structures (GlyTouCan Accessions). The enzymes listed are human or mouse proteins that are associated with the canonical monosaccharides. The association of a glycan structure to the canonical monosaccharides are based on the Glyco tree model. The glycans listed in the database may or may not have a human (TaxID:9606) or mouse (TaxID:10090) species annotation from the source (GlyTouCan or UniCarbKB) database. Please note that a human or mouse biosynthetic enzyme association should not be considered as a human or mouse species annotation for that glycan." + ], + "score": { + "usability_domain_length": 657 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_013112/0.1", + "usability_domain": [ + "This pipeline enables the complete identification and sequencing of all coding/non-coding RNA species in human genome in one sequencing run.", + "The data used to validate this pipeline are simulated. Text link to simulated dataset is included input field.", + "Final output Fastq files were checked for anti-sense reads using custom python scripts (not released)", + "the original pipeline can be accessed https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-S7-S10/figures/1", + "Gene annotation parameters available at http://bmcbioinformatics.biomedcentral.com/articles/101.1186/1471-2105-14-S7-S10", + "Protocol patent pending RM2010A000293-PCT/IB2011/052369" + ], + "score": { + "usability_domain_length": 654 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_013112/1.0", + "usability_domain": [ + "This pipeline enables the complete identification and sequencing of all coding/non-coding RNA species in human genome in one sequencing run.", + "The data used to validate this pipeline are simulated. Text link to simulated dataset is included input field.", + "Final output Fastq files were checked for anti-sense reads using custom python scripts (not released)", + "the original pipeline can be accessed https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-S7-S10/figures/1", + "Gene annotation parameters available at http://bmcbioinformatics.biomedcentral.com/articles/101.1186/1471-2105-14-S7-S10", + "Protocol patent pending RM2010A000293-PCT/IB2011/052369" + ], + "score": { + "usability_domain_length": 654 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000038/1.2", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", + "For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 652 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000225/v-2.0.2", + "usability_domain": [ + "The Human Diseases dataset contains list of human [taxid:9606] diseases as described by OMIM and Monarch Initiative database. ", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship. ", + "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest. ", + "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, EMBL-EBI-UniProt and GlyGen. " + ], + "score": { + "usability_domain_length": 651 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000225/v-2.1.1", + "usability_domain": [ + "The Human Diseases dataset contains list of human [taxid:9606] diseases as described by OMIM and Monarch Initiative database. ", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship. ", + "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest. ", + "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, EMBL-EBI-UniProt and GlyGen. " + ], + "score": { + "usability_domain_length": 651 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000109/1.0.36", + "usability_domain": [ + "The therascreen PIK3CA RGQ PCR Kit is a real-time qualitative PCR test for the detection of 11 mutations in the PIK3CA gene (Exon 7: C420R; Exon 9: E542K, E545A, E545D [1635G>T only], E545G, E545K, Q546E, Q546R; and Exon 20: H1047L, H1047R, H1047Y) using genomic DNA (gDNA) extracted from formalin-fixed, paraffin-embedded (FFPE) breast tumor tissue or circulating tumor DNA (ctDNA) from plasma derived from K2EDTA anticoagulated peripheral whole blood ... The test is intended to aid clinicians in identifying breast cancer patients who may be eligible for treatment with PIQRAY (alpelisib) based on a PIK3CA Mutation Detected result.FTCID:P190001" + ], + "score": { + "usability_domain_length": 648 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000009/1.6", + "usability_domain": [ + "Quality control metrics extracted from NCBI's SRA run data.", + "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 647 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000459/1.2.1", + "usability_domain": [ + "nfcore/chipseq is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data.", + "For use with multiple replicates, the group identifier should be identical when you have multiple replicates from the same experimental group, just increment the replicate identifier appropriately. The first replicate value for any given experimental group must be 1.", + "Both the group and replicate identifiers should be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. " + ], + "score": { + "usability_domain_length": 646 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000465/1.2.1", + "usability_domain": [ + "nfcore/chipseq is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data.", + "For use with multiple replicates, the group identifier should be identical when you have multiple replicates from the same experimental group, just increment the replicate identifier appropriately. The first replicate value for any given experimental group must be 1.", + "Both the group and replicate identifiers should be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. " + ], + "score": { + "usability_domain_length": 646 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000038/1.1", + "usability_domain": [ + "Quality control data extracted from NCBI's SRA for the FDA ARGOS BioProject", + "For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." + ], + "score": { + "usability_domain_length": 643 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000701/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniprotKB Xref RefSeq contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 635 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000701/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniprotKB Xref RefSeq contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " + ], + "score": { + "usability_domain_length": 635 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000092/1.0.36", + "usability_domain": [ + "A0396 is a panel of identified gene predictive biomarkers (mutations) in lung cancer (DOID:1324). The genes are ALK (UPKB:Q9UM73), CDK4 (UPKB:P11802), DDR2 (UPKB:Q16832), MAP2K1 (UPKB:Q02750), MAP2K2 (UPKB:Q02750), EGFR (UPKB:P00533), FGFR2 (UPKB:P21802), FGFR3 (UPKB:P22607), HRAS (UPKB:P01112), KRAS (UPKB:P01116), NRAS (UPKB:P01111), MET (UPKB:P08581), KIT (UPKB:P10721), PIK3CA (UPKB:P42336), PGFRA (UPKB:P16234), RET (UPKB:P07949), ROS1 (UPKB:P08922), ATK1 (UPKB:P31749), RAF1 (UPKB:P04049), ERBB2 (UPKB:P04626), ERBB3 (UPKB:P21860), BRAF (UPKB:P15056), MTOR (UPKB:P42345). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 634 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000378/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Virus Pathogen Resource (ViPR) contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to ViPR database accessions/identifiers.", + "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." + ], + "score": { + "usability_domain_length": 634 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000379/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Virus Pathogen Resource contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to ViPR database accessions/identifiers.", + "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." + ], + "score": { + "usability_domain_length": 634 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000308/v-2.0.2", + "usability_domain": [ + "The dataset provides Genes involved in the Congenital disorders of glycosylation from Genomics England PanelApp ( PanelApp crowdsources expert knowledge to establish consensus diagnostic gene panels. Antonio Rueda Martin and Eleanor Williams, Rebecca E. Foulger, Sarah Leigh, Louise C. Daugherty, Olivia Niblock, Ivone U. S. Leong, Katherine R. Smith, Oleg Gerasimenko, Eik Haraldsdottir, Ellen Thomas, Richard H. Scott, Emma Baple, Arianna Tucci, Helen Brittain, Anna de Burca, Kristina Iba\u00f1ez, Dalia Kasperaviciute, Damian Smedley, Mark Caulfield, Augusto Rendon & Ellen M. McDonagh. Nat Genet (2019) doi:10.1038/s41588-019-0528-2)" + ], + "score": { + "usability_domain_length": 633 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000308/v-2.1.1", + "usability_domain": [ + "The dataset provides Genes involved in the Congenital disorders of glycosylation from Genomics England PanelApp ( PanelApp crowdsources expert knowledge to establish consensus diagnostic gene panels. Antonio Rueda Martin and Eleanor Williams, Rebecca E. Foulger, Sarah Leigh, Louise C. Daugherty, Olivia Niblock, Ivone U. S. Leong, Katherine R. Smith, Oleg Gerasimenko, Eik Haraldsdottir, Ellen Thomas, Richard H. Scott, Emma Baple, Arianna Tucci, Helen Brittain, Anna de Burca, Kristina Iba\u00f1ez, Dalia Kasperaviciute, Damian Smedley, Mark Caulfield, Augusto Rendon & Ellen M. McDonagh. Nat Genet (2019) doi:10.1038/s41588-019-0528-2)" + ], + "score": { + "usability_domain_length": 633 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000092/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref BioMuta contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BioMuta database accessions/identifiers. BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates. The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 632 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000092/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref BioMuta contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BioMuta database accessions/identifiers. BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates. The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 632 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_018406/1.0", + "usability_domain": [ + "Next generation sequencing data is used to characterize the genetic background of genetically engineered mice through the use of variant identification. This pipeline follows the anaylsis of publically available RNA sequencing data. The process will begin with alignement and complete at the point where KO-linked variants are identified. Farkas, C., Fuentes-Villalobos, F., Rebolledo-Jaramillo, B. et al. Streamlined computational pipeline for genetic background characterization of genetically engineered mice based on next generation sequencing data. BMC Genomics 20, 131 (2019). https://doi.org/10.1186/s12864-019-5504-9" + ], + "score": { + "usability_domain_length": 625 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_067092/1.0", + "usability_domain": [ + "Pipeline for identifying copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods. Approximately 25 individuals were randomly chosen from each of the CEU, YRI, CHB, JPT, MXL, CLM, PUR, ASW, LWK, CHS, TSI, IBS, FIN, and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" + ], + "score": { + "usability_domain_length": 621 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000049/1.0", + "usability_domain": [ + "Pipeline for identifying copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods. Approximately 25 individuals were randomly chosen from each of the CEU, YRI, CHB, JPT, MXL, CLM, PUR, ASW, LWK, CHS, TSI, IBS, FIN, and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" + ], + "score": { + "usability_domain_length": 621 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000369/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref IntAct dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 619 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000369/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref IntAct dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 619 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000355/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Citations dataset contains publication information for hcv1a [taxid:11108] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/hcv1a_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 616 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000355/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Citations dataset contains publication information for hcv1a [taxid:11108] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/hcv1a_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 616 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.0", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.1", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.2", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.3", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000368/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref IntAct dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000630/v-2.0.2", + "usability_domain": [ + "The Fruitfly Glycosylation Sites (GlyConnect) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported glycans in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Drosophila melanogaster). If you use this dataset please provide proper attribution to GlyConnect and GlyGen. Please note that this dataset does not contain protein or sites information just the glycans" + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.4", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000368/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref IntAct dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000630/v-2.1.1", + "usability_domain": [ + "The Fruitfly Glycosylation Sites (GlyConnect) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported glycans in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Drosophila melanogaster). If you use this dataset please provide proper attribution to GlyConnect and GlyGen. Please note that this dataset does not contain protein or sites information just the glycans" + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000011/1.5", + "usability_domain": [ + "Positional QC process.", + "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." + ], + "score": { + "usability_domain_length": 612 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000038/1.0", + "usability_domain": [ + "This algorithm was originally developed at the Center for Biologics Evaluation and Research to assist in safety prediction of aluminum containing infant vaccines. A full description of the algorithm is published in the journal \u201cvaccine\u201d (Mitkus et al. 2011). The algorithm is capable of creating aluminum pharmacokinetic profiles for infant following recommended vaccination schedule by the Advisory Committee on Immunization Practices of the Centers for Disease Control and Prevention. Also it predicts aluminum safety threshold based on the level set by the Agency for Toxic Substances and Disease Registry." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.0", + "usability_domain": [ + "BioSample metadata extracted from SRA using the HIVE Lab workflow.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.1", + "usability_domain": [ + "BioSample metadata extracted from SRA using the HIVE Lab workflow.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000101/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref GeneCards contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to GeneCards database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " GeneCards is a searchable, integrative database that provides comprehensive, user-friendly information on all annotated and predicted human genes. It automatically integrates gene-centric data from ~150 web sources, including genomic, transcriptomic, proteomic, genetic, clinical and functional information." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000091/1.0.36", + "usability_domain": [ + "A0395 Gene expression levels (measured by the Ct value determined when the fluorescent signal exceeds a pre-defined threshold limit. If the external controls are valid, then the Ct value for each gene marker in the patient sample is compared to marker-specific Ct cutoff values. Samples with Ct values less than or equal to one or both of the cutoff values for MG or CKi9 are considered positive. The Cutoff Ct values are as follows: MG < 31, CK19 < 30, Internal Control < 36.) for breast cancer metastasis. The genes are MG (NA) and KRT19 (UPKB:P08727). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000101/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref GeneCards contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to GeneCards database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " GeneCards is a searchable, integrative database that provides comprehensive, user-friendly information on all annotated and predicted human genes. It automatically integrates gene-centric data from ~150 web sources, including genomic, transcriptomic, proteomic, genetic, clinical and functional information." + ], + "score": { + "usability_domain_length": 610 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_022530/2.2", + "usability_domain": [ + "Identify treatment emergent amino acid substitutions[so:0000048] that correlate with antiviral drug treatment failure", + "This biocompute object is the BCO proof of concept for FDA data submission process. BCO is intended to facilitate data analysis and communication during FDA data submission. We have generated mock HCV patient data to mimic a real clinical trial FDA submission and confirm if BCO facilitates the submission process. BCO elucidates potential dicordant results between prirmary data analyis and FDA data re-analysis", + "Clinical trial description can be found at: URI: https://clinicaltrials.gov/" + ], + "score": { + "usability_domain_length": 605 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000498/v-2.0.2", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for human [taxid:9606] proteins (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB human glycoprotein dataset (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019). " + ], + "score": { + "usability_domain_length": 605 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000498/v-2.1.1", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for human [taxid:9606] proteins (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB human glycoprotein dataset (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019). " + ], + "score": { + "usability_domain_length": 605 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000536/v-2.0.2", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for mouse [taxid:10090] proteins (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB mouse glycoprotein dataset (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)" + ], + "score": { + "usability_domain_length": 604 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000536/v-2.1.1", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for mouse [taxid:10090] proteins (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB mouse glycoprotein dataset (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)" + ], + "score": { + "usability_domain_length": 604 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000448/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref IntAct dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 603 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000448/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref IntAct dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." + ], + "score": { + "usability_domain_length": 603 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000276/v-2.0.2", + "usability_domain": [ + "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 601 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000276/v-2.1.1", + "usability_domain": [ + "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." + ], + "score": { + "usability_domain_length": 601 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000537/v-2.0.2", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for rat [taxid:10116] proteins (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB rat glycoprotein dataset (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)" + ], + "score": { + "usability_domain_length": 600 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000537/v-2.1.1", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for rat [taxid:10116] proteins (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB rat glycoprotein dataset (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)" + ], + "score": { + "usability_domain_length": 600 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000541/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 596 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000541/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 596 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000010/1.2", + "usability_domain": [ + "BioSample metadata extracted from SRA.", + "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." + ], + "score": { + "usability_domain_length": 582 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000447/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref NCBI Gene dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 582 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000447/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref NCBI Gene dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 582 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000036/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Citations dataset contains publication information for mouse [taxid:10090] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/mouse_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 581 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000103/1.0.36", + "usability_domain": [ + "A0407 is a TOP2A FISH pharmDx Kit is designed to detect amplifications and deletions (copy number changes) of the TOP2A gene using fluorescence in situ hybridization (FISH) technique on formalin- fixed, paraffin-embedded human breast cancer tissue specimens. Deletions and amplifications of the TOP2A gene serve as a marker for poor prognosis in high-risk breast cancer patients. Results from the TOP2A FISH pharmDx MT Kit are intended for use as an adjunct to existing clinical and pathological information. [FTCID:P050045]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 581 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000036/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Citations dataset contains publication information for mouse [taxid:10090] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/mouse_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 581 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000035/v-2.0.2", + "usability_domain": [ + "The Human Protein Citations dataset contains publication information for human [taxid:9606] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/human_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 578 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000233/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref IntAct dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 578 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000035/v-2.1.1", + "usability_domain": [ + "The Human Protein Citations dataset contains publication information for human [taxid:9606] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/human_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 578 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000233/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref IntAct dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 578 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000236/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref IntAct dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 577 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000253/v-2.0.2", + "usability_domain": [ + "The Rat Protein Citations dataset contains publication information for rat [taxid:10116] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/rat_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 577 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000236/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref IntAct dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 577 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000253/v-2.1.1", + "usability_domain": [ + "The Rat Protein Citations dataset contains publication information for rat [taxid:10116] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/rat_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 577 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000263/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref IntAct dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 574 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000263/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref IntAct dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 574 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000530/v-2.0.2", + "usability_domain": [ + "The SARS-CoV2 Glycosylation Sites (GlyConnect) dataset contains SARS-CoV2 [taxid:2697049] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Severe acute respiratory syndrome coronavirus 2 (2019-nCoV)). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 572 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000530/v-2.1.1", + "usability_domain": [ + "The SARS-CoV2 Glycosylation Sites (GlyConnect) dataset contains SARS-CoV2 [taxid:2697049] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Severe acute respiratory syndrome coronavirus 2 (2019-nCoV)). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 572 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000028/1.1", + "usability_domain": [ + "BioSample metadata is manually extracted from NCBI's BioSample database.", + "This dataset is one of the four 'core' tables produced by the Crandall Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples." + ], + "score": { + "usability_domain_length": 570 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000028/1.0", + "usability_domain": [ + "BioSample metadata is manually extracted from NCBI's BioSample database.", + "This dataset is one of the four 'core' tables produced by the Crandall Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" + ], + "score": { + "usability_domain_length": 569 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000020/1.0", + "usability_domain": [ + "BioSample metadata is manually extracted from NCBI's BioSample database.", + "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" + ], + "score": { + "usability_domain_length": 565 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000020/1.1", + "usability_domain": [ + "BioSample metadata is manually extracted from NCBI's BioSample database.", + "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" + ], + "score": { + "usability_domain_length": 565 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000583/v-2.0.2", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 564 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000625/v-2.0.2", + "usability_domain": [ + "This is a Human Protein Cancer Biomarkers dataset. It contains human [taxonomy:9606] cancer biomarker information including biomarker main x-ref mapped to UniProtKB, assessed biomarker entity, BEST biomarker type, literature evidence, disease name. and LOINC code from the cancer biomarker portal (https://data.oncomx.org/cancerbiomarkers). If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite:PMID:32142370, PMID:34015823, and PMID:31616925. This dataset can be used for obtaining cancer biomarker information for 19 cancer types." + ], + "score": { + "usability_domain_length": 564 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000583/v-2.1.1", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 564 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000582/v-2.0.2", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 563 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000582/v-2.1.1", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 563 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000584/v-2.0.2", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 562 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000386/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniprotKB Xref BRENDA contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 562 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000386/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniprotKB Xref BRENDA contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 562 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000584/v-2.1.1", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 562 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000371/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref InterPro contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 556 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000396/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for mouse [taxid:10090] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 556 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000371/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref InterPro contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 556 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000396/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for mouse [taxid:10090] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 556 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000395/v-2.0.2", + "usability_domain": [ + "The Human Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for human [taxid:9606] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 555 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000395/v-2.1.1", + "usability_domain": [ + "The Human Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for human [taxid:9606] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 555 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_046954/1.0", + "usability_domain": [ + "Pipeline for identifying expression counts from three maize genome assemblies and corresponding annotations, which are B73, PH207 and W22 .Iin each dataset, the tissue samples were chosen to broadly capture variation. Two biological replicates per genotype/tissue combination and standard, non-stranded RNAseq libraries were prepared and sequenced on Illumina HiSeq 2500, using 50\u2009bp SE reads, avg. number of reads\u2009=\u200930.5 million. file:///dev/tmpfs was used for the file IOs since some steps were ran on the command line. doi: 10.1186/s12864-020-6696-8." + ], + "score": { + "usability_domain_length": 554 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000450/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref InterPro contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 554 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000450/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref InterPro contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 554 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000397/v-2.0.2", + "usability_domain": [ + "The Rat Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for rat [taxid:10116] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot.. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 553 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000104/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref NCBI Gene dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 553 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000104/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref NCBI Gene dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 553 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000397/v-2.1.1", + "usability_domain": [ + "The Rat Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for rat [taxid:10116] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot.. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 553 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000105/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref NCBI Gene dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " + ], + "score": { + "usability_domain_length": 552 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000105/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref NCBI Gene dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " + ], + "score": { + "usability_domain_length": 552 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000272/2.9", + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus" + ], + "score": { + "usability_domain_length": 551 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000370/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref InterPro contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 549 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000370/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref InterPro contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 549 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000032/1.0", + "usability_domain": [ + "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 548 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000144/1.0", + "usability_domain": [ + "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 548 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000032/1.2", + "usability_domain": [ + "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 548 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000238/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref NCBI Gene dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " + ], + "score": { + "usability_domain_length": 548 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000238/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref NCBI Gene dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " + ], + "score": { + "usability_domain_length": 548 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000449/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref InterPro contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 540 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000449/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref InterPro contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 540 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000102/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref HGNC dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to HGNC accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The HUGO Gene Nomenclature Committee is a committee of the Human Genome Organization that sets the standards for human gene nomenclature. The HGNC approves a unique and meaningful name for every known human gene, based on a query of experts. " + ], + "score": { + "usability_domain_length": 535 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000102/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref HGNC dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to HGNC accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The HUGO Gene Nomenclature Committee is a committee of the Human Genome Organization that sets the standards for human gene nomenclature. The HGNC approves a unique and meaningful name for every known human gene, based on a query of experts. " + ], + "score": { + "usability_domain_length": 535 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000637/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Citations dataset contains publication information for fruitfly [taxid:7227] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/fruitfly_protein_blacklisted_pmids_uniprotkb.csv. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 534 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000637/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Citations dataset contains publication information for fruitfly [taxid:7227] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/fruitfly_protein_blacklisted_pmids_uniprotkb.csv. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 534 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000663/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref IntAct dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 529 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000663/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref IntAct dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " + ], + "score": { + "usability_domain_length": 529 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000094/v-2.0.2", + "usability_domain": [ + "The Mouse UniprotKB Xref BRENDA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 526 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000094/v-2.1.1", + "usability_domain": [ + "The Mouse UniprotKB Xref BRENDA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 526 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000093/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref BRENDA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 524 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000093/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref BRENDA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 524 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000016/1.0.25", + "usability_domain": [ + "Comprehensive cancer driver mutations table. The file human_cancer_driver_muts.csv contains a list of gene symbols and ensembl transcript ids mapped to cancer driver mutations predicted by three structural analysis level computational tools. The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations.The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations." + ], + "score": { + "usability_domain_length": 522 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000056/1.0.36", + "usability_domain": [ + "Comprehensive cancer driver mutations table. The file human_cancer_driver_muts.csv contains a list of gene symbols and ensembl transcript ids mapped to cancer driver mutations predicted by three structural analysis level computational tools. The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations.The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations." + ], + "score": { + "usability_domain_length": 522 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000270/v-2.0.2", + "usability_domain": [ + "The Rat UniprotKB Xref BRENDA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 521 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000019/1.2", + "usability_domain": [ + "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", + "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " + ], + "score": { + "usability_domain_length": 521 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000019/1.3", + "usability_domain": [ + "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", + "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " + ], + "score": { + "usability_domain_length": 521 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000270/v-2.1.1", + "usability_domain": [ + "The Rat UniprotKB Xref BRENDA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 521 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000019/1.4", + "usability_domain": [ + "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", + "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " + ], + "score": { + "usability_domain_length": 521 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000090/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref Bgee dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 519 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000090/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref Bgee dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 519 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000004/1.3", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure, map to gene sequences, and more.." + ], + "score": { + "usability_domain_length": 518 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_022531/2.2", + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]. Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure. Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus." + ], + "score": { + "usability_domain_length": 516 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000331/v-2.0.2", + "usability_domain": [ + "The Rat Glycosylation Sites (GlyConnect) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Rattus Norvegicus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 516 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000266/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref Bgee dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + " Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 516 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000266/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref Bgee dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + " Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 516 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000331/v-2.1.1", + "usability_domain": [ + "The Rat Glycosylation Sites (GlyConnect) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Rattus Norvegicus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 516 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000004/1.0", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000004/1.1", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000004/1.2", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000118/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref PANTHER contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000107/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref InterPro contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000330/v-2.0.2", + "usability_domain": [ + "The Mouse Glycosylation Sites (GlyConnect) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Mus musculus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000107/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref InterPro contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000118/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref PANTHER contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000330/v-2.1.1", + "usability_domain": [ + "The Mouse Glycosylation Sites (GlyConnect) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Mus musculus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 515 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000329/v-2.0.2", + "usability_domain": [ + "The Human Glycosylation Sites (GlyConnect) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Homo sapiens). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 514 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000329/v-2.1.1", + "usability_domain": [ + "The Human Glycosylation Sites (GlyConnect) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Homo sapiens). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." + ], + "score": { + "usability_domain_length": 514 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000232/v-2.0.2", + "usability_domain": [ + "The Rat UniprotKB Xref CAZy contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates. " + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000095/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref CAZy contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000096/v-2.0.2", + "usability_domain": [ + "The Mouse UniprotKB Xref CAZy contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates" + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000095/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref CAZy contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000096/v-2.1.1", + "usability_domain": [ + "The Mouse UniprotKB Xref CAZy contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates" + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000232/v-2.1.1", + "usability_domain": [ + "The Rat UniprotKB Xref CAZy contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates. " + ], + "score": { + "usability_domain_length": 513 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000100/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448; Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 512 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000243/v-2.0.2", + "usability_domain": [ + "The Rat UniprotKB Xref InterPro contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " + ], + "score": { + "usability_domain_length": 512 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000261/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref PANTHER contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " + ], + "score": { + "usability_domain_length": 512 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000243/v-2.1.1", + "usability_domain": [ + "The Rat UniprotKB Xref InterPro contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " + ], + "score": { + "usability_domain_length": 512 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000261/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref PANTHER contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " + ], + "score": { + "usability_domain_length": 512 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000117/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref PANTHER contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000106/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref InterPro contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000139/v-2.0.2", + "usability_domain": [ + "The Mouse Proteoform Citations (UniCarbKB) contains mouse [taxid:10090] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. . The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000106/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref InterPro contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000117/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref PANTHER contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000139/v-2.1.1", + "usability_domain": [ + "The Mouse Proteoform Citations (UniCarbKB) contains mouse [taxid:10090] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. . The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 511 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000091/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref Bgee contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets" + ], + "score": { + "usability_domain_length": 510 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000091/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref Bgee contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets" + ], + "score": { + "usability_domain_length": 510 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000138/v-2.0.2", + "usability_domain": [ + "The Human Proteoform Citations (UniCarbKB) contains human [taxid:9606] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 508 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000138/v-2.1.1", + "usability_domain": [ + "The Human Proteoform Citations (UniCarbKB) contains human [taxid:9606] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 508 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000707/v-2.1.1", + "usability_domain": [ + "This dataset indicates the tool support status of each GlyTouCan accession. The status is required for the GlyCan Detail API. The tools are sandbox, gnome, pdb, gnome_glygen, gnome_glygen_nglycans, gnome_glygen_oglycans, gnome_glycotree_nglycans, and gnome_glycotree_oglycans. PDB support for glycam 3D structures was provided by Rob Woods (Glycam/UGA). Sandbox support was provided by Will York (UGA) and Nathan Edwards (Georgetown). All other tool support data was provided by Nathan Edwards (Georgetown)." + ], + "score": { + "usability_domain_length": 507 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000655/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for fruitfly [taxid:7227] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 506 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000655/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for fruitfly [taxid:7227] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 506 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000251/v-2.0.2", + "usability_domain": [ + "The Rat Proteoform Citations (UniCarbKB) contains rat [taxid:10116] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 505 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000662/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref NCBI Gene dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 505 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000251/v-2.1.1", + "usability_domain": [ + "The Rat Proteoform Citations (UniCarbKB) contains rat [taxid:10116] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " + ], + "score": { + "usability_domain_length": 505 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000662/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref NCBI Gene dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." + ], + "score": { + "usability_domain_length": 505 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000001/1.1", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000001/1.2", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000001/1.3", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000001/1.4", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000557/v-2.0.2", + "usability_domain": [ + "Glycan dictionary is a list of terms which describe some structural aspect of the glycan composition or structure. The terms are extracted with a combination of manual and automatic literature mining tools from various sources. It is an on-going process and the current list includes 180 terms..The terms can also be viewed on the GlyGen wikipedia:https://wiki.glygen.org/index.php/Glycan_structure_dictionary. To submit more terms please follow instructions provided on the GlyGen wikipedia page. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000557/v-2.1.1", + "usability_domain": [ + "Glycan dictionary is a list of terms which describe some structural aspect of the glycan composition or structure. The terms are extracted with a combination of manual and automatic literature mining tools from various sources. It is an on-going process and the current list includes 180 terms..The terms can also be viewed on the GlyGen wikipedia:https://wiki.glygen.org/index.php/Glycan_structure_dictionary. To submit more terms please follow instructions provided on the GlyGen wikipedia page. " + ], + "score": { + "usability_domain_length": 498 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000426/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov2 [taxid:2697049] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 495 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000081/1.0.36", + "usability_domain": [ + "A list of identified, differentially expressed human (taxid:9606) predictor protein biomarkers in liver cirrhosis (DOID:5082) and hepatocellular carcinoma (DOID:684). Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen and disease types; the biomarker entity, its measurement modality (e.g., increased expression) and type; and cross reference (programmatic linkage or resource ID) to data resources, models, and codes. [PMID:x]" + ], + "score": { + "usability_domain_length": 495 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000426/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov2 [taxid:2697049] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 495 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000029/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus (strain Musoke-80) reference protein coding sequences", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000180448; strain Kenya/Musoke/1980). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 494 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000029/1.1", + "usability_domain": [ + "Lake Victoria marburgvirus (strain Musoke-80) reference protein coding sequences", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000180448; strain Kenya/Musoke/1980). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." + ], + "score": { + "usability_domain_length": 494 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000488/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Diseases dataset contains disease caused by hcv1b virus [taxid:11116] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." + ], + "score": { + "usability_domain_length": 494 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000488/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Diseases dataset contains disease caused by hcv1b virus [taxid:11116] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." + ], + "score": { + "usability_domain_length": 494 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000381/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 492 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000381/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 492 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000629/v-2.0.2", + "usability_domain": [ + "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in the UniCarbKB SARS-CoV1 Glycosylation Sites dataset (https://data.glygen.org/GLY_000628). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:26940363, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining publication information for UniCarbKB glycosylation data." + ], + "score": { + "usability_domain_length": 490 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000629/v-2.1.1", + "usability_domain": [ + "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in the UniCarbKB SARS-CoV1 Glycosylation Sites dataset (https://data.glygen.org/GLY_000628). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:26940363, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining publication information for UniCarbKB glycosylation data." + ], + "score": { + "usability_domain_length": 490 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000055/1.0", + "usability_domain": [ + "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. ", + "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. Multiple databases are searched to gather information relating to amino acid changes that confer drug resistance within a genome. The information is gathered at the amino acid level and each entry is cross-referenced and verified through publication searches." + ], + "score": { + "usability_domain_length": 489 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000055/1.1", + "usability_domain": [ + "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. ", + "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. Multiple databases are searched to gather information relating to amino acid changes that confer drug resistance within a genome. The information is gathered at the amino acid level and each entry is cross-referenced and verified through publication searches." + ], + "score": { + "usability_domain_length": 489 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000487/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Diseases dataset contains disease caused by hcv1a virus [taxid:11108] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." + ], + "score": { + "usability_domain_length": 487 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000487/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Diseases dataset contains disease caused by hcv1a virus [taxid:11108] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." + ], + "score": { + "usability_domain_length": 487 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000380/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 485 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000380/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 485 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000425/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov1 [taxid:694009] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 481 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000425/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov1 [taxid:694009] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 481 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000373/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref PDB contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 479 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000373/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref PDB contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 479 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000101/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000031/0.1", + "usability_domain": [ + "Lake Victoria marburgvirus reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000031/1.1", + "usability_domain": [ + "Lake Victoria marburgvirus reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000031/1.2", + "usability_domain": [ + "Lake Victoria marburgvirus reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000375/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000279/v-2.0.2", + "usability_domain": [ + "The Rat Glycosyltransferases dataset contains list of rat [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. If you use this dataset please provide proper attribution to GlyGen" + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000294/0.1", + "usability_domain": [ + "This pipeline represents a workflow that identifies SNPs, deletions, and insertions that correspond to reduced antiviral drug efficacy in Hepatitis C virus subtype 1/a. This is first done through an alignment of the viral reads to a reference genome. When the small read sequences are mapped, the identification of SNPs results in a variant map. This resulting SNP map can be contrasted between individuals with or without drug resistance to associate probable causal mutations." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000279/v-2.1.1", + "usability_domain": [ + "The Rat Glycosyltransferases dataset contains list of rat [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. If you use this dataset please provide proper attribution to GlyGen" + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000375/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 478 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000110/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref OMIM contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMIM database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship." + ], + "score": { + "usability_domain_length": 477 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000452/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref PDB contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 477 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000077/1.0.36", + "usability_domain": [ + " A0131 is a biomarker panel of 20 biomarker entities that enable prognosis of COVID-19 (DOID:0080600) disease course by measuring the change in distribution of 20 urinary peptides. The biomarker panel named COVID20, is composed of 20 endogenous peptides mainly derived from various collagen chains that enable differentiating moderate or severe disease from critical state or death with 83% sensitivity at 100% specificity. Biomarker data retrieved from PubMed [PMID: 32960510]" + ], + "score": { + "usability_domain_length": 477 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000110/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref OMIM contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMIM database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship." + ], + "score": { + "usability_domain_length": 477 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000452/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref PDB contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 477 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000658/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniprotKB Xref BRENDA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 476 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000454/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref Pfam contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 476 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000454/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref Pfam contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 476 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000658/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniprotKB Xref BRENDA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." + ], + "score": { + "usability_domain_length": 476 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000031/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448; Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 475 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000116/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref OrthoDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 473 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000116/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref OrthoDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 473 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000372/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref PDB contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000344/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Proteome Materlist dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020_06 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. Please note GlyGen has removed F protein (P0C045 (F_HCV77)) from its masterlist of accessions." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000115/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref OrthoDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000115/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref OrthoDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000344/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Proteome Materlist dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020_06 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. Please note GlyGen has removed F protein (P0C045 (F_HCV77)) from its masterlist of accessions." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000372/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref PDB contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 472 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000257/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref OrthoDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology. " + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000374/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000657/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Bgee dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000480/v-2.0.2", + "usability_domain": [ + "Human Glycosylation Sites [GPTwiki], provided by the Clinical and Translational Glycoscience Research Center (CTGRC), Georgetown University. The database contains list of human [taxid:9606] proteins with information on glycosylation sites and associated glycans from GPTwiki database [https://edwardslab.bmcb.georgetown.edu/gptwiki/Main_Page]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000103/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref MGI contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to MGI database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "MGI is the international database resource for the laboratory mouse, providing integrated genetic, genomic, and biological data to facilitate the study of mouse health and disease." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000103/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref MGI contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to MGI database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "MGI is the international database resource for the laboratory mouse, providing integrated genetic, genomic, and biological data to facilitate the study of mouse health and disease." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000257/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref OrthoDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology. " + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000374/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000480/v-2.1.1", + "usability_domain": [ + "Human Glycosylation Sites [GPTwiki], provided by the Clinical and Translational Glycoscience Research Center (CTGRC), Georgetown University. The database contains list of human [taxid:9606] proteins with information on glycosylation sites and associated glycans from GPTwiki database [https://edwardslab.bmcb.georgetown.edu/gptwiki/Main_Page]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000657/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Bgee dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." + ], + "score": { + "usability_domain_length": 471 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_022118/1.0", + "usability_domain": [ + "BCO Object for interrogating Telomerase RNA Component (TERC) - chromatin interaction. Three libraries were assembled: HiChIRP TERC sample from human female B lymphoblastoid cell line (GM12878) as well as RNase (GM12878) and HeLa cell line TERC-knockout samples as negative controls. Libraries were generated by performing HiChIRP (https://doi.org/10.1038/s41592-019-0407-x) and Illumina paired-end sequencing on the samples. Pipeline steps are repeated for each sample." + ], + "score": { + "usability_domain_length": 469 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000098/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref CDD contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 469 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000097/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref CDD contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 469 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000097/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref CDD contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 469 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000098/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref CDD contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 469 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000234/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref CDD contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins. " + ], + "score": { + "usability_domain_length": 468 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000234/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref CDD contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins. " + ], + "score": { + "usability_domain_length": 468 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_015568/1.0", + "usability_domain": [ + "Pipeline for identifying malarial cell types in images using region-based convolutional neural networks. Deep learning object detection presents another method of identifying cells in different stages of P. vivax development based on training from 1364 annotated images. The outputs of the pipeline are the model weights and validation results. This pipeline was created based on the work of Hung, J., Goodman, A., Ravel, D. et al. doi: 10.1186/s12859-020-03635-x" + ], + "score": { + "usability_domain_length": 466 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000659/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniprotKB Xref CAZy contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." + ], + "score": { + "usability_domain_length": 465 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000414/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Binary Interaction (IntAct) dataset contains sarscov1 [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 465 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000414/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Binary Interaction (IntAct) dataset contains sarscov1 [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 465 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000659/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniprotKB Xref CAZy contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." + ], + "score": { + "usability_domain_length": 465 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000664/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref InterPro contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000451/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref PDB contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000668/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref PANTHER contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000451/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref PDB contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000664/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref InterPro contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000668/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref PANTHER contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." + ], + "score": { + "usability_domain_length": 463 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000005/1.0", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000005/1.1", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000005/1.2", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000005/1.3", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000453/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref Pfam contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000453/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref Pfam contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." + ], + "score": { + "usability_domain_length": 462 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000286/v-2.0.2", + "usability_domain": [ + "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000621/v-2.0.2", + "usability_domain": [ + "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000286/v-2.1.1", + "usability_domain": [ + "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000621/v-2.1.1", + "usability_domain": [ + "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 460 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000129/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref UniCarbKB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " + ], + "score": { + "usability_domain_length": 459 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000377/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." + ], + "score": { + "usability_domain_length": 459 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000129/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref UniCarbKB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " + ], + "score": { + "usability_domain_length": 459 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000377/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." + ], + "score": { + "usability_domain_length": 459 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000130/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref UniCarbKB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 458 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000130/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref UniCarbKB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 458 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000289/v-2.1.1", + "usability_domain": [ + "The dataset provides the sequences in InChI format and InChI key for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping dataset (https://data.glygen.org/GLY_000305). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). InChI sequences and keys are retrieved from the PubChem ftp site (ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/CURRENT-Full/SDF/)." + ], + "score": { + "usability_domain_length": 458 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000109/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref KEGG PATHWAY contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 457 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000109/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref KEGG PATHWAY contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 457 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000259/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref UniCarbKB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000108/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref KEGG PATHWAY contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000250/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref KEGG PATHWAY contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks. " + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000108/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref KEGG PATHWAY contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000250/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref KEGG PATHWAY contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks. " + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000259/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref UniCarbKB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " + ], + "score": { + "usability_domain_length": 456 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000022/1.2", + "usability_domain": [ + "QC of short read sequences re-assembled into genomes.", + "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." + ], + "score": { + "usability_domain_length": 455 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000269/v-2.0.2", + "usability_domain": [ + "The Mouse N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for mouse [taxid:10090] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 455 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000269/v-2.1.1", + "usability_domain": [ + "The Mouse N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for mouse [taxid:10090] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 455 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000401/v-2.0.2", + "usability_domain": [ + "The Human Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for human [taxid:9606] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000376/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000391/v-2.0.2", + "usability_domain": [ + "The Mouse Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for mouse [taxid:10090] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000376/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000391/v-2.1.1", + "usability_domain": [ + "The Mouse Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for mouse [taxid:10090] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000401/v-2.1.1", + "usability_domain": [ + "The Human Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for human [taxid:9606] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 452 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000272/v-2.0.2", + "usability_domain": [ + "The Rat N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for rat [taxid:10116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 451 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000112/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref neXtProt contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to neXtProt database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "neXtProt is a comprehensive human-centric discovery platform, offering its users a seamless integration of and navigation through protein-related data.", + "" + ], + "score": { + "usability_domain_length": 451 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000112/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref neXtProt contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to neXtProt database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "neXtProt is a comprehensive human-centric discovery platform, offering its users a seamless integration of and navigation through protein-related data.", + "" + ], + "score": { + "usability_domain_length": 451 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000272/v-2.1.1", + "usability_domain": [ + "The Rat N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for rat [taxid:10116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 451 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000390/v-2.0.2", + "usability_domain": [ + "The Rat Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for rat [taxid:10116] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 448 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000390/v-2.1.1", + "usability_domain": [ + "The Rat Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for rat [taxid:10116] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 448 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000002/1.1", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 446 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000002/1.2", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 446 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000002/1.3", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 446 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000002/1.4", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 446 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000006/1.0", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 445 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000006/1.1", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 445 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000006/1.2", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 445 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000675/v-2.0.2", + "usability_domain": [ + "The Fruitfly Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file httpss://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (httpss://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 445 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000675/v-2.1.1", + "usability_domain": [ + "The Fruitfly Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file httpss://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (httpss://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 445 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000535/v-2.0.2", + "usability_domain": [ + "The SARS-CoV2 Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000530. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 444 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000535/v-2.1.1", + "usability_domain": [ + "The SARS-CoV2 Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000530. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 444 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000056/1.0", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.\n\nThis dataset can be used to get list of canonical and isoform proteins with status and the gene names." + ], + "score": { + "usability_domain_length": 443 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000057/1.0", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.\n\nThis dataset can be used to get list of canonical and isoform proteins with status and the gene names." + ], + "score": { + "usability_domain_length": 443 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_013139/1.0", + "usability_domain": [ + "N-glycosylation has been studied in the development and progression of BC. N-glycan abundances were measured using UPLC from interstitial samples and matched serum. TIF, NIF, and serum samples were collected from about 90 women diagnosed with breast cancer and a total of 165 N-glycan groups were identified. TIF-serum N-glycan abundance correlation was analyzed using CAMPP. GP1, GP37, and GP38 were found to have significant correlation." + ], + "score": { + "usability_domain_length": 440 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000483/v-2.0.2", + "usability_domain": [ + "The Human Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 440 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000484/v-2.0.2", + "usability_domain": [ + "The Mouse Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 440 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000483/v-2.1.1", + "usability_domain": [ + "The Human Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 440 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000484/v-2.1.1", + "usability_domain": [ + "The Mouse Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 440 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000034/1.0", + "usability_domain": [ + "Salmonella typhimurium reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000032/1.1", + "usability_domain": [ + "Salmonella typhimurium reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000034/1.1", + "usability_domain": [ + "Salmonella typhimurium reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000485/v-2.0.2", + "usability_domain": [ + "The Rat Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000408/v-2.0.2", + "usability_domain": [ + "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as an animated GIF through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000124/v-2.0.2", + "usability_domain": [ + "The Mouse UniprotKB Xref PRO contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000313/v-2.0.2", + "usability_domain": [ + "The Human Protein Binary Interaction (IntAct) dataset contains human [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000120/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref PDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000314/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Binary Interaction (IntAct) dataset contains mouse [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000120/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref PDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000124/v-2.1.1", + "usability_domain": [ + "The Mouse UniprotKB Xref PRO contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + " PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000313/v-2.1.1", + "usability_domain": [ + "The Human Protein Binary Interaction (IntAct) dataset contains human [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000314/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Binary Interaction (IntAct) dataset contains mouse [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000485/v-2.1.1", + "usability_domain": [ + "The Rat Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000408/v-2.1.1", + "usability_domain": [ + "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as an animated GIF through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" + ], + "score": { + "usability_domain_length": 438 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000122/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref Pfam contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 437 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000119/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref PDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 437 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000119/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref PDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 437 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000122/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref Pfam contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 437 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000013/1.1", + "usability_domain": [ + "HIV1 (HXB2) reference protein accessions and summary annotations. ", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000013/1.2", + "usability_domain": [ + "HIV1 (HXB2) reference protein accessions and summary annotations. ", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000013/1.3", + "usability_domain": [ + "HIV1 (HXB2) reference protein accessions and summary annotations. ", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000121/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref Pfam contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000136/v-2.0.2", + "usability_domain": [ + "The Mouse Transcript Locus (Ensembl Transcript coordinates) contains UniProtKB mouse [taxid:10090] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000121/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref Pfam contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000136/v-2.1.1", + "usability_domain": [ + "The Mouse Transcript Locus (Ensembl Transcript coordinates) contains UniProtKB mouse [taxid:10090] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 436 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000409/v-2.0.2", + "usability_domain": [ + "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as a .mp4 video through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000135/v-2.0.2", + "usability_domain": [ + "The Human Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB human [taxid:9606] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000123/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref PRO contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000245/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref PDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000123/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref PRO contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000135/v-2.1.1", + "usability_domain": [ + "The Human Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB human [taxid:9606] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000245/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref PDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000409/v-2.1.1", + "usability_domain": [ + "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as a .mp4 video through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" + ], + "score": { + "usability_domain_length": 435 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000446/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref ChEMBL contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 434 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000315/v-2.0.2", + "usability_domain": [ + "The Rat Protein Binary Interaction (IntAct) dataset contains rat [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 434 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000315/v-2.1.1", + "usability_domain": [ + "The Rat Protein Binary Interaction (IntAct) dataset contains rat [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 434 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000446/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref ChEMBL contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 434 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000262/v-2.0.2", + "usability_domain": [ + "The Rat UniprotKB Xref PRO contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 432 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000262/v-2.1.1", + "usability_domain": [ + "The Rat UniprotKB Xref PRO contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 432 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000248/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref Pfam contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000273/v-2.0.2", + "usability_domain": [ + "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to Rat Genome Database (RGD) accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. f you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + "The Rat Genome Database (RGD) is the premier site for genetic, genomic, phenotype, and disease data generated from rat research" + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000237/v-2.0.2", + "usability_domain": [ + "The Rat Ensembl Transcript Locus (Transcript coordinates)s contains UniProtKB rat [taxid:10116] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000049/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for non-small cell lung cancer - This file contains FDA-approved human biomarker tests for non-small cell lung cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000237/v-2.1.1", + "usability_domain": [ + "The Rat Ensembl Transcript Locus (Transcript coordinates)s contains UniProtKB rat [taxid:10116] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000248/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref Pfam contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000273/v-2.1.1", + "usability_domain": [ + "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to Rat Genome Database (RGD) accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. f you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", + "The Rat Genome Database (RGD) is the premier site for genetic, genomic, phenotype, and disease data generated from rat research" + ], + "score": { + "usability_domain_length": 431 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000403/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1b [taxid:11116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 430 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000027/1.0.25", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for non-small cell lung cancer - This file contains FDA-approved human biomarker tests for non-small cell lung cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)" + ], + "score": { + "usability_domain_length": 430 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000403/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1b [taxid:11116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 430 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000082/v-2.0.2", + "usability_domain": [ + "The Mouse Gene Locus dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the corresponding ensembl gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 429 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000082/v-2.1.1", + "usability_domain": [ + "The Mouse Gene Locus dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the corresponding ensembl gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 429 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000428/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov2 [taxid:2697049] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." + ], + "score": { + "usability_domain_length": 428 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000105/1.0.36", + "usability_domain": [ + "A0409 is the Invader UGT1A1 Molecular Assay is an in vitro diagnostic test for the detection and genotyping of the *1 (TA6) and *28 (TA7) alleles of the UDP glucuronosyltransferase 1A1 (UGT1A1) gene in genomic DNA from whole peripheral blood as an aid in the identification of patients with greater risk for decreased UDP-glucuronosyltransferase activity. [FTCID:K051824]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 428 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000428/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov2 [taxid:2697049] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." + ], + "score": { + "usability_domain_length": 428 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000003/1.0", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 427 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000003/1.1", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 427 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000003/1.2", + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure, and more" + ], + "score": { + "usability_domain_length": 427 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000267/v-2.0.2", + "usability_domain": [ + "The Rat Gene Locus (Ensembl) contains rat [taxid:10116] UniProtKB canonical accessions mapped to the corresponding ENSEMBL Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 427 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000267/v-2.1.1", + "usability_domain": [ + "The Rat Gene Locus (Ensembl) contains rat [taxid:10116] UniProtKB canonical accessions mapped to the corresponding ENSEMBL Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 427 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000613/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2) Protein Names (NCBI RefSeq) dataset contains sarscov2 [taxid:2697049] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 425 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000613/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2) Protein Names (NCBI RefSeq) dataset contains sarscov2 [taxid:2697049] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 425 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000667/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref OrthoDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 424 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000667/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref OrthoDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." + ], + "score": { + "usability_domain_length": 424 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000402/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1a [taxid:11108] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 423 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000402/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1a [taxid:11108] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 423 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000489/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Diseases dataset contains disease caused by sarscov1 virus [taxid:694009] which is SARS and is mapped to all UniProtKB canonical accessions. The dataset contains SARS (Severe Acute Respiratory Syndrome) disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 422 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000489/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Diseases dataset contains disease caused by sarscov1 virus [taxid:694009] which is SARS and is mapped to all UniProtKB canonical accessions. The dataset contains SARS (Severe Acute Respiratory Syndrome) disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 422 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000660/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref CDD contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 421 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000660/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref CDD contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." + ], + "score": { + "usability_domain_length": 421 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000737/v-2.1.1", + "usability_domain": [ + "The Glycan Biomarkers dataset contains glycan biomarker information, including cross-mapped GlyTouCan accessions, assessed biomarker entity, biomarker type, specimen type, LOINC code, disease name, and source evidence from the biomarker portal (https://data.oncomx.org/allbiomarkers). If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite: PMID:32142370, PMID:34015823, and PMID:31616925." + ], + "score": { + "usability_domain_length": 421 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_016981/1.0", + "usability_domain": [ + "Template workflow for the detection of adventitious virus, workflow was built and tested with a titration of synthetic reads and applied to a real dataset. The synthetic reads were generated from a set of 25 viruses sequences that represent the 25 families in the viral zone database and 6 host organisms as background sequences. The pipeline can be used to detect adventitious viral agents in biological manufacturing." + ], + "score": { + "usability_domain_length": 419 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000057/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] nonsynonymous single-nucleotide variations (nsSNVs) with data in cancer samples from TCGA, ICGC, COSMIC, ClinVar, and CIViC - This file contains human [taxid:9606] nonsynonymous single-nucleotide variations (nsSNV) in cancer samples. Variants are mapped to canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." + ], + "score": { + "usability_domain_length": 419 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000007/v-2.0.2", + "usability_domain": [ + "The Mouse Proteome Masterlist dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 414 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000427/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov1 [taxid:694009] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." + ], + "score": { + "usability_domain_length": 414 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000007/v-2.1.1", + "usability_domain": [ + "The Mouse Proteome Masterlist dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 414 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000427/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov1 [taxid:694009] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." + ], + "score": { + "usability_domain_length": 414 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000043/v-2.0.2", + "usability_domain": [ + "The Mouse Glycosylation Sites (RCSB PDB) dataset contains mouse [taxid:10090] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 413 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000026/1.0.25", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for colorectal cancer - This file contains FDA-approved human biomarker tests for colorectal cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 413 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000047/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for colorectal cancer - This file contains FDA-approved human biomarker tests for colorectal cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 413 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000043/v-2.1.1", + "usability_domain": [ + "The Mouse Glycosylation Sites (RCSB PDB) dataset contains mouse [taxid:10090] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 413 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_022807/1.0", + "usability_domain": [ + "A subtyping model for SARS-CoV-2 based on Informative Subtype Markers (ISMs) defined as variable regions between viral genomes that serve as characteristic regions in the genome. This pipeline creates a multiple sequence alignment and together with the metadata allows for the viral profiling of different subtypes across different geographic locations visualized as pie charts/time series plots and ISM tables. " + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000042/v-2.0.2", + "usability_domain": [ + "The Human Glycosylation Sites (RCSB PDB) dataset contains human [taxid:9606] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.0", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.1", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.2", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.3", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000042/v-2.1.1", + "usability_domain": [ + "The Human Glycosylation Sites (RCSB PDB) dataset contains human [taxid:9606] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.4", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000001/23.5", + "usability_domain": [ + "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", + "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." + ], + "score": { + "usability_domain_length": 412 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000013/27.0", + "usability_domain": [ + "Multiple reference guided assembly with BWA, VAPOR and iVar.", + "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." + ], + "score": { + "usability_domain_length": 411 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000013/27.1", + "usability_domain": [ + "Multiple reference guided assembly with BWA, VAPOR and iVar.", + "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." + ], + "score": { + "usability_domain_length": 411 + } + }, + { + "object_id": "https://biocomputeobject.org/ARG_000013/27.2", + "usability_domain": [ + "Multiple reference guided assembly with BWA, VAPOR and iVar.", + "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." + ], + "score": { + "usability_domain_length": 411 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000367/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) (HCV) Protein Site Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000226/v-2.0.2", + "usability_domain": [ + "The Rat Glycosylation Sites (RCSB PDB) dataset contains rat [taxid:11106] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000366/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000030/1.0.25", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for melanoma cancer - This file contains FDA-approved human biomarker tests for melanoma cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000048/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for melanoma cancer - This file contains FDA-approved human biomarker tests for melanoma cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000051/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for prostate cancer - This file contains FDA-approved human biomarker tests for prostate cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000226/v-2.1.1", + "usability_domain": [ + "The Rat Glycosylation Sites (RCSB PDB) dataset contains rat [taxid:11106] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000366/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000367/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) (HCV) Protein Site Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 409 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000665/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref KEGG PATHWAY contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 408 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000029/1.0.25", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for prostate cancer - This file contains FDA-approved human biomarker tests for prostate cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)" + ], + "score": { + "usability_domain_length": 408 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000665/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref KEGG PATHWAY contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." + ], + "score": { + "usability_domain_length": 408 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000351/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Sequence Info (UniProtKB) dataset contains hcv1b [taxid:11108] UniProtKB protein sequence information that includes sequence version fasta header for the hcv1b accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000114/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref OMA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000229/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref OMA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes. " + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000420/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000028/1.0.25", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for ovarian cancer - This file contains FDA-approved human biomarker tests for ovarian cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000050/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for ovarian cancer - This file contains FDA-approved human biomarker tests for ovarian cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000114/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref OMA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000229/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref OMA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes. " + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000351/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Sequence Info (UniProtKB) dataset contains hcv1b [taxid:11108] UniProtKB protein sequence information that includes sequence version fasta header for the hcv1b accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000420/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 407 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.1", + "usability_domain": [ + "List of SRA IDs and associated data that are in data.argosdb.", + "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 406 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000113/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref OMA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 406 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000113/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref OMA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 406 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000100/v-2.0.2", + "usability_domain": [ + "The Mouse UniprotKB Xref ChEMBL contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000099/v-2.0.2", + "usability_domain": [ + "The Human UniprotKB Xref ChEMBL contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000289/v-2.0.2", + "usability_domain": [ + "The dataset provides the InChI key and glycan sequences in InChI format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping (https://data.glygen.org/GLYDS000281). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000144). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000042/1.0.36", + "usability_domain": [ + "FDA-approved or cleared nucleic acid-based human biomarker tests for breast cancer - This file contains FDA-approved human biomarker tests for breast cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000099/v-2.1.1", + "usability_domain": [ + "The Human UniprotKB Xref ChEMBL contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000100/v-2.1.1", + "usability_domain": [ + "The Mouse UniprotKB Xref ChEMBL contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 405 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000464/v-2.0.2", + "usability_domain": [ + "The GlyGen-Pharos Cross-references Mapping dataset contains GlyGen's human [taxid:9606] UniProtKB accessions and corresponding GlyGen URLs. The dataset is derived from 2019-09 UniProtKB release.", + "The dataset is created for Pharos database to crosslink their protein page links to GlyGen protein pages.", + "The log file contains the Pharos protein accessions that could not be mapped to GlyGen protein accessions" + ], + "score": { + "usability_domain_length": 404 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000227/v-2.0.2", + "usability_domain": [ + "The Rat UniprotKB Xref ChEMBL contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties. " + ], + "score": { + "usability_domain_length": 404 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000227/v-2.1.1", + "usability_domain": [ + "The Rat UniprotKB Xref ChEMBL contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties. " + ], + "score": { + "usability_domain_length": 404 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000464/v-2.1.1", + "usability_domain": [ + "The GlyGen-Pharos Cross-references Mapping dataset contains GlyGen's human [taxid:9606] UniProtKB accessions and corresponding GlyGen URLs. The dataset is derived from 2019-09 UniProtKB release.", + "The dataset is created for Pharos database to crosslink their protein page links to GlyGen protein pages.", + "The log file contains the Pharos protein accessions that could not be mapped to GlyGen protein accessions" + ], + "score": { + "usability_domain_length": 404 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000008/1.5", + "usability_domain": [ + "Assembly metadata extracted from NCBI from the Original FDA-ARGOS BioProject. ", + "This table includes metadata from the FDA-ARGOS BioProject (PRJNA231221) with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the project and have been deposited into NCBI BioProject and SRA." + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000364/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein PTM Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000472/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein PTM Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000475/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Phosphorylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000342/v-2.0.2", + "usability_domain": [ + "The dataset provides glycans that have at least one \"NeuAc\" residue in the current GlyGen dataset (https://data.glygen.org/GLYDS000281). The GlyTouCan accessions included in this list have mammalian species annotation (human/mouse/rat) through direct TaxID annotation and/or inferred via subsumption. This table was customized based on a GlyGen user query submitted through GlyGen help on Feb 17th 2020." + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000342/v-2.1.1", + "usability_domain": [ + "The dataset provides glycans that have at least one \"NeuAc\" residue in the current GlyGen dataset (https://data.glygen.org/GLYDS000281). The GlyTouCan accessions included in this list have mammalian species annotation (human/mouse/rat) through direct TaxID annotation and/or inferred via subsumption. This table was customized based on a GlyGen user query submitted through GlyGen help on Feb 17th 2020." + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000364/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein PTM Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000472/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein PTM Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000475/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Phosphorylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 403 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000022/1.0", + "usability_domain": [ + "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000022/1.1", + "usability_domain": [ + "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000385/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Phosphorylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000365/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000365/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000385/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Phosphorylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 402 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000479/v-2.0.2", + "usability_domain": [ + "List of SARS coronavirus (SARS-CoV-2 or 2019-nCoV) [taxid:2697049] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000434)" + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000444/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Site Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000490/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Diseases dataset contains disease caused by sarscov2 virus [taxid:2697049] which is COVID-19 mapped to all UniProtKB canonical accessions. The dataset contains COVID-19 disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000444/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Site Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000479/v-2.1.1", + "usability_domain": [ + "List of SARS coronavirus (SARS-CoV-2 or 2019-nCoV) [taxid:2697049] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000434)" + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000490/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Diseases dataset contains disease caused by sarscov2 virus [taxid:2697049] which is COVID-19 mapped to all UniProtKB canonical accessions. The dataset contains COVID-19 disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 401 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000641/v-2.0.2", + "usability_domain": [ + "The Fruitfly Gene Names (UniProtKB) dataset contains UniProtKB gene Names/names for fruitfly [taxid:7227] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 400 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000641/v-2.1.1", + "usability_domain": [ + "The Fruitfly Gene Names (UniProtKB) dataset contains UniProtKB gene Names/names for fruitfly [taxid:7227] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 400 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000494/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Site Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 398 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.4", + "usability_domain": [ + "ngs ID List, Selection Criteria and Key Table ", + "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 398 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000494/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Site Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 398 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000735/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref O-GlcNAcAtlas dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAcAtlas database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAcAtlas and GlyGen. O-GlcNAcAtlas is a a rigorously curated database for experimentally identified O-GlcNAc sites/proteins (https://oglcnac.org/atlas/)" + ], + "score": { + "usability_domain_length": 398 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.5", + "usability_domain": [ + "ngs ID List, Selection Criteria and Key Table ", + "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 398 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.6", + "usability_domain": [ + "ngs ID List, Selection Criteria and Key Table", + "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 397 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000393/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Names (NCBI RefSeq) dataset contains mouse [taxid:10090] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 396 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000363/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein PTM Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 396 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.3", + "usability_domain": [ + "ngs ID List, Selection Criteria and Master Table ", + "This dataset was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 396 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000363/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein PTM Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 396 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000393/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Names (NCBI RefSeq) dataset contains mouse [taxid:10090] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 396 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000392/v-2.0.2", + "usability_domain": [ + "The Human Protein Names (NCBI RefSeq) dataset contains human [taxid:9606] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 395 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000384/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Phosphorylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 395 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000384/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Phosphorylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 395 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000392/v-2.1.1", + "usability_domain": [ + "The Human Protein Names (NCBI RefSeq) dataset contains human [taxid:9606] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 395 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000125/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref Reactome contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000126/v-2.0.2", + "usability_domain": [ + "TheMouse UniProtKB Xref Reactome contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database." + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000260/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref Reactome contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.2", + "usability_domain": [ + "ngs ID List, Selection Criteria and Master Table ", + "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000125/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref Reactome contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000126/v-2.1.1", + "usability_domain": [ + "TheMouse UniProtKB Xref Reactome contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database." + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000260/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref Reactome contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 394 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000419/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 393 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000419/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 393 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000018/1.0", + "usability_domain": [ + "List of SRA ids that are in beta-data.argosdb.", + "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." + ], + "score": { + "usability_domain_length": 391 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000736/v-2.1.1", + "usability_domain": [ + "The GlyGen-iPTMnet Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], fruitfly [taxid:7227], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of mapping UniProtKB glycosylation site info to PMID and GlyGen glycosylation site view URLs." + ], + "score": { + "usability_domain_length": 391 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000474/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Phosphorylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000615/v-2.0.2", + "usability_domain": [ + "The The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) NCBI Protein Linkouts contains sarscov2 [taxid:2697049] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000691/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Binary Interaction (IntAct) dataset contains fruitfly [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000669/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref PDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000471/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein PTM Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000471/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein PTM Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000474/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Phosphorylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000615/v-2.1.1", + "usability_domain": [ + "The The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) NCBI Protein Linkouts contains sarscov2 [taxid:2697049] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000669/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref PDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000691/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Binary Interaction (IntAct) dataset contains fruitfly [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 389 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000670/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Pfam contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 388 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000670/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Pfam contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " + ], + "score": { + "usability_domain_length": 388 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000465/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Site Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 387 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000671/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniprotKB Xref PRO contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 387 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000465/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Site Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 387 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000671/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniprotKB Xref PRO contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " + ], + "score": { + "usability_domain_length": 387 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000656/v-2.0.2", + "usability_domain": [ + "The Fruitfly Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB fruitfly [taxid:7227] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 386 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000656/v-2.1.1", + "usability_domain": [ + "The Fruitfly Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB fruitfly [taxid:7227] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 386 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000473/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Glycosylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000383/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Glycosylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000239/v-2.0.2", + "usability_domain": [ + "The Human N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for human [taxid:9606] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000554/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000239/v-2.1.1", + "usability_domain": [ + "The Human N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for human [taxid:9606] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000383/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Glycosylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000473/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Glycosylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000554/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 385 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000434/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Proteome Master list dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 384 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000434/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Proteome Master list dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." + ], + "score": { + "usability_domain_length": 384 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000345/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Proteome Materlist dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 383 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000032/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] differentially expressed genes or miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed genes in cancer samples. Genes are mapped to RefSeq transcripts and canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." + ], + "score": { + "usability_domain_length": 383 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000022/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] differentially expressed genes or miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed genes in cancer samples. Genes are mapped to RefSeq transcripts and canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." + ], + "score": { + "usability_domain_length": 383 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000345/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Proteome Materlist dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 383 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000538/v-2.1.1", + "usability_domain": [ + "The Glycan Citations (NCFG) dataset contains publication information for a library of complex multiantennary Asn-linked N-glycans generated by chemo-enzymatic synthesis, including PMID, journal name, date, author and title. The dataset is derived from the dataset contributed by the Richard Cummings Laboratory (NCFG BIDMC Harvard Medical School) http://data.glygen.org/GLY_000600." + ], + "score": { + "usability_domain_length": 381 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000742/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 381 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000382/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Glycosylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 378 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000235/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Citations (RefSeq) datasets contains mouse [taxid:10090] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 378 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000235/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Citations (RefSeq) datasets contains mouse [taxid:10090] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 378 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000382/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Glycosylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 378 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_023769/1.0", + "usability_domain": [ + "This curated test computation evaluates the performance of Heptagon, a tool that performs base and SNP-calling for a previously computed alignment and provides quality and noise assessment profiles. ", + "Heptagon was used to identify SNPs from the previous Hexagon alignment of Whole Exome Sequencing of lung squamous carcinoma (SQCC) patients against human reference genome GRCh38." + ], + "score": { + "usability_domain_length": 377 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_025006/1.0", + "usability_domain": [ + "Full-length genome sequence of segmented RNA virus from ticks was obtained using small RNA sequencing data. this is the first study in which 5\u2032 and 3\u2032 sRNAs were used to generate full-length genome sequences of, but not limited to, RNA viruses. The workflow demonstrates the feasibility of using the sRNA-seq based methods for the detection of viruses in pooled/indiviudal RNA " + ], + "score": { + "usability_domain_length": 377 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000256/v-2.0.2", + "usability_domain": [ + "The Human Protein Citations (RefSeq) datasets contains human [taxid:9606] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 377 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000108/1.0.36", + "usability_domain": [ + "A0412 is the Vysis ALK Break Apart FISH Probe Kit is a qualitative test to detect rearrangements involving the ALK gene via fluorescence in situ hybridization (FISH) in formalin-fixed paraffin-embedded (FFPE) non- small cell lung cancer (NSCLC) tissue specimens ... The test is for prescription use only. [FTCID:P110012]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 377 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000256/v-2.1.1", + "usability_domain": [ + "The Human Protein Citations (RefSeq) datasets contains human [taxid:9606] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 377 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000319/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Reactions (Reactome) dataset contains reactions information for pathways for mouse [taxid:10090] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 376 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000029/v-2.0.2", + "usability_domain": [ + "Gene Expression (Cancer) dataset contains the gene expression information in cancers from BioXpress database. The GlyGen interface shows the gene expression (cancer) information for select cancer types/DOIDs. For more cancer types/DOIDs please refer - https://hive.biochemistry.gwu.edu/bioxpress. If you use this dataset please give proper attribution to BioXpress and GlyGen." + ], + "score": { + "usability_domain_length": 376 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000029/v-2.1.1", + "usability_domain": [ + "Gene Expression (Cancer) dataset contains the gene expression information in cancers from BioXpress database. The GlyGen interface shows the gene expression (cancer) information for select cancer types/DOIDs. For more cancer types/DOIDs please refer - https://hive.biochemistry.gwu.edu/bioxpress. If you use this dataset please give proper attribution to BioXpress and GlyGen." + ], + "score": { + "usability_domain_length": 376 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000319/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Reactions (Reactome) dataset contains reactions information for pathways for mouse [taxid:10090] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 376 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000310/v-2.0.2", + "usability_domain": [ + "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 375 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000320/v-2.0.2", + "usability_domain": [ + "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 375 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000310/v-2.1.1", + "usability_domain": [ + "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 375 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000320/v-2.1.1", + "usability_domain": [ + "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 375 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000466/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Information dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 373 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000466/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Information dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 373 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000321/v-2.0.2", + "usability_domain": [ + "The Rat Protein Reactions (Reactome) dataset contains reactions information for pathways for rat [taxid:10116] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 372 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000688/v-2.0.2", + "usability_domain": [ + "\"The Fruitfly Protein Reaction Participants (Reactome) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen\"" + ], + "score": { + "usability_domain_length": 372 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000321/v-2.1.1", + "usability_domain": [ + "The Rat Protein Reactions (Reactome) dataset contains reactions information for pathways for rat [taxid:10116] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 372 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000688/v-2.1.1", + "usability_domain": [ + "\"The Fruitfly Protein Reaction Participants (Reactome) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen\"" + ], + "score": { + "usability_domain_length": 372 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000433/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Information dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000495/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Glycosylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000264/v-2.0.2", + "usability_domain": [ + "The Rat Protein Citations (RefSeq) datasets contains rat [taxid:10116] UniProtKB accessions mapped to publication information (pmid, title, journal name, publication, date, authors of corresponding RefSeq accessions. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000556/v-2.0.2", + "usability_domain": [ + "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlycoMotif (https://glycomotif.glyomics.org/glycomotif/GlycoMotif). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281) and the associated PMIDs is derived from file Glycan Motif(https://data.glygen.org/GLY_000283)." + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000638/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Enzyme Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000614/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) NCBI Protein Linkouts contains sarscov1 [taxid:694009] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000467/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Proteome Master list dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000264/v-2.1.1", + "usability_domain": [ + "The Rat Protein Citations (RefSeq) datasets contains rat [taxid:10116] UniProtKB accessions mapped to publication information (pmid, title, journal name, publication, date, authors of corresponding RefSeq accessions. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000433/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Information dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000467/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Proteome Master list dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000495/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Glycosylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000614/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) NCBI Protein Linkouts contains sarscov1 [taxid:694009] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000638/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Enzyme Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 371 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000435/0.1", + "usability_domain": [ + "For this data set, we pulled biomarker data from OncoMX. From the raw TSV, the data was cleaned and panel biomarkers were filtered out. Then, temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers. " + ], + "score": { + "usability_domain_length": 370 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000012/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Canonical (UniProtKB) Sequences dataset contains mouse [taxid:10090] protein canonical fasta sequences from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 369 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000012/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Canonical (UniProtKB) Sequences dataset contains mouse [taxid:10090] protein canonical fasta sequences from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 369 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000354/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Information dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 366 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000354/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Information dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 366 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000323/v-2.0.2", + "usability_domain": [ + "The Human Protein Enzyme Annotation (UniProtKB) dataset contains human [taxid:9606] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 365 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000324/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Enzyme Annotation (UniProtKB) dataset contains mouse [taxid:10090] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 365 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000323/v-2.1.1", + "usability_domain": [ + "The Human Protein Enzyme Annotation (UniProtKB) dataset contains human [taxid:9606] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 365 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000324/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Enzyme Annotation (UniProtKB) dataset contains mouse [taxid:10090] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 365 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000647/v-2.0.2", + "usability_domain": [ + "The Fruitfly Proteome Masterlist dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 364 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000647/v-2.1.1", + "usability_domain": [ + "The Fruitfly Proteome Masterlist dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 364 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000041/v-2.0.2", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000007)." + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000341/v-2.0.2", + "usability_domain": [ + "The dataset provides species annotation for the associated glycan (GlyTouCan Accession) generated from direct TaxID annotation and/or inferred via subsumption. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.ncbi.nlm.nih.gov/taxonomy." + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000268/v-2.0.2", + "usability_domain": [ + "The Human Tyr O-Linked Glycosylation Sites dataset contains human [taxid:9606] UniProtKB accessions that are Tyr O-Linked Glycosylated. The script that processes this dataset matches the amino acid and position with the latest release fasta file as a QC check to eliminate the incorrect entries. If you use this dataset please provide proper attribution to GlyGen" + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000041/v-2.1.1", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000007)." + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000268/v-2.1.1", + "usability_domain": [ + "The Human Tyr O-Linked Glycosylation Sites dataset contains human [taxid:9606] UniProtKB accessions that are Tyr O-Linked Glycosylated. The script that processes this dataset matches the amino acid and position with the latest release fasta file as a QC check to eliminate the incorrect entries. If you use this dataset please provide proper attribution to GlyGen" + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000341/v-2.1.1", + "usability_domain": [ + "The dataset provides species annotation for the associated glycan (GlyTouCan Accession) generated from direct TaxID annotation and/or inferred via subsumption. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.ncbi.nlm.nih.gov/taxonomy." + ], + "score": { + "usability_domain_length": 363 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000356/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Citations dataset contains publication information for hcv1b [taxid:11116] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000640/v-2.0.2", + "usability_domain": [ + "The Fruitfly Gene Locus (Ensembl) contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000441/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Sequence Info (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000040/v-2.0.2", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)." + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000673/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref O-GlcNAc (MCW) contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAc (MCW) database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAc (MCW) and GlyGen. O - GlcNAc(MCW) is a database for O - GlcNAc glycosylation information. https://www.oglcnac.mcw.edu/" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000291/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in SMILES Isomeric format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000040/v-2.1.1", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)." + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000291/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in SMILES Isomeric format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000356/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Citations dataset contains publication information for hcv1b [taxid:11116] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000441/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Sequence Info (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000640/v-2.1.1", + "usability_domain": [ + "The Fruitfly Gene Locus (Ensembl) contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000673/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref O-GlcNAc (MCW) contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAc (MCW) database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAc (MCW) and GlyGen. O - GlcNAc(MCW) is a database for O - GlcNAc glycosylation information. https://www.oglcnac.mcw.edu/" + ], + "score": { + "usability_domain_length": 362 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000325/v-2.0.2", + "usability_domain": [ + "The Rat Protein Enzyme Annotation (UniProtKB) dataset contains rat [taxid:10116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000349/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Isoform sequences (UniProtKB) dataset contains hcv1b [taxid:11116] protein fasta sequences for the hcv1b isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000317/v-2.0.2", + "usability_domain": [ + "The Mouse Protein PTM Annotation (UniProtKB) dataset contains mouse [taxid:10090] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000254/v-2.0.2", + "usability_domain": [ + "The Mouse Phosphorylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000221/v-2.0.2", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000244)." + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000412/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Isoform sequences (UniProtKB) dataset contains sarscov2 [taxid:2697049] protein fasta sequences for the sarscov2 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000221/v-2.1.1", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000244)." + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000254/v-2.1.1", + "usability_domain": [ + "The Mouse Phosphorylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000317/v-2.1.1", + "usability_domain": [ + "The Mouse Protein PTM Annotation (UniProtKB) dataset contains mouse [taxid:10090] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000325/v-2.1.1", + "usability_domain": [ + "The Rat Protein Enzyme Annotation (UniProtKB) dataset contains rat [taxid:10116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000349/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Isoform sequences (UniProtKB) dataset contains hcv1b [taxid:11116] protein fasta sequences for the hcv1b isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000412/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Isoform sequences (UniProtKB) dataset contains sarscov2 [taxid:2697049] protein fasta sequences for the sarscov2 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 361 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000470/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Citations dataset contains publication information for sarscov2 [taxid:2697049] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 360 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000353/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) EBI-UniProtKB NT file contains proteome data for hcv1b [taxid:11116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 360 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000087/1.0.36", + "usability_domain": [ + "A0391 is a biomarker panel of identified gene and protein alterations in colorectal cancer (DOID:9256) to screen adults of either sex, 50 years or older, who are at typical average risk for CRC. The genes are KRAS (UPKB:P01116), BMP3 (UPKB:P12645), NDRG4 (UPKB:Q9ULP0), and the hemoglobin (HBB) protein. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 360 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000353/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) EBI-UniProtKB NT file contains proteome data for hcv1b [taxid:11116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 360 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000470/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Citations dataset contains publication information for sarscov2 [taxid:2697049] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 360 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000033/1.2", + "usability_domain": [ + "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference proteome fasta sequences.", + "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 359 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000033/1.0", + "usability_domain": [ + "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference proteome fasta sequences.", + "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 359 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000327/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Site Annotation (UniProtKB) dataset contains mouse [taxid:10090] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 359 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000327/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Site Annotation (UniProtKB) dataset contains mouse [taxid:10090] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 359 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000316/v-2.0.2", + "usability_domain": [ + "The Human Protein PTM Annotation (UniProtKB) dataset contains human [taxid:9606] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000322/v-2.0.2", + "usability_domain": [ + "The Human Protein Site Annotation (UniProtKB) dataset contains human [taxid:9606] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000689/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Reaction Participants (Rhea) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000666/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref OMA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000436/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) EBI-UniProtKB NT file contains proteome data for sarscov2 [taxid:2697049] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000316/v-2.1.1", + "usability_domain": [ + "The Human Protein PTM Annotation (UniProtKB) dataset contains human [taxid:9606] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000322/v-2.1.1", + "usability_domain": [ + "The Human Protein Site Annotation (UniProtKB) dataset contains human [taxid:9606] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000436/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) EBI-UniProtKB NT file contains proteome data for sarscov2 [taxid:2697049] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000666/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref OMA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000689/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Reaction Participants (Rhea) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen" + ], + "score": { + "usability_domain_length": 358 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000432/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Information dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000274/v-2.0.2", + "usability_domain": [ + "The Human Phosphorylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000231/v-2.0.2", + "usability_domain": [ + "The Rat Phosphorylation Sites (UniProtKB) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000305/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://pubchem.ncbi.nlm.nih.gov" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000645/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Information (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000661/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniprotKB Xref ChEMBL contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000231/v-2.1.1", + "usability_domain": [ + "The Rat Phosphorylation Sites (UniProtKB) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000274/v-2.1.1", + "usability_domain": [ + "The Human Phosphorylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000305/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://pubchem.ncbi.nlm.nih.gov" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000432/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Information dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000645/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Information (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000661/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniprotKB Xref ChEMBL contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000556/v-2.1.1", + "usability_domain": [ + "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlycoMotif (https://glycomotif.glyomics.org/). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281) and the associated PMIDs are derived from the Glycan Motif dataset (https://data.glygen.org/GLY_000283)." + ], + "score": { + "usability_domain_length": 357 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000081/v-2.0.2", + "usability_domain": [ + "The Human Gene Locus (Ensembl) contains human [taxid:9606] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 356 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000081/v-2.1.1", + "usability_domain": [ + "The Human Gene Locus (Ensembl) contains human [taxid:9606] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 356 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000318/v-2.0.2", + "usability_domain": [ + "The Rat Protein PTM Annotation (UniProtKB) dataset contains rat [taxid:10116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 355 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000328/v-2.0.2", + "usability_domain": [ + "The Rat Protein Site Annotation (UniProtKB) dataset contains rat [taxid:10116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 355 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000318/v-2.1.1", + "usability_domain": [ + "The Rat Protein PTM Annotation (UniProtKB) dataset contains rat [taxid:10116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 355 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000328/v-2.1.1", + "usability_domain": [ + "The Rat Protein Site Annotation (UniProtKB) dataset contains rat [taxid:10116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 355 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000348/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Isoform sequences (UniProtKB) dataset contains hcv1a [taxid:11108] protein fasta sequences for the hcv1a isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 354 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000352/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) EBI-UniProtKB NT file contains proteome data for hcv1a [taxid:11108] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 354 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000348/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Isoform sequences (UniProtKB) dataset contains hcv1a [taxid:11108] protein fasta sequences for the hcv1a isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 354 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000352/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) EBI-UniProtKB NT file contains proteome data for hcv1a [taxid:11108] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 354 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000437/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Names (NCBI RefSeq) dataset contains sarscov1 [taxid:694009] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 353 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000437/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Names (NCBI RefSeq) dataset contains sarscov1 [taxid:694009] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 353 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000139/1.0", + "usability_domain": [ + "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000139/1.0", + "usability_domain": [ + "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000139/1.0", + "usability_domain": [ + "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000139/1.0", + "usability_domain": [ + "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000139/1.0", + "usability_domain": [ + "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000362/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Gene Ontology (GO) dataset contains hcv1b [taxid:11116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000350/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Sequences Info (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB protein sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000350/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Sequences Info (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB protein sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000362/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Gene Ontology (GO) dataset contains hcv1b [taxid:11116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 352 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000022/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Information (NCBI RefSeq) dataset contains mouse [taxid:10090] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000692/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references status to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). The dataset allows to know whether a given GlyGen GlyTouCan has PubChem mapping or not." + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000021/v-2.0.2", + "usability_domain": [ + "The Human Protein Information (NCBI RefSeq) dataset contains human [taxid:9606] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000021/v-2.1.1", + "usability_domain": [ + "The Human Protein Information (NCBI RefSeq) dataset contains human [taxid:9606] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000022/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Information (NCBI RefSeq) dataset contains mouse [taxid:10090] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000692/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references status to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). The dataset allows to know whether a given GlyGen GlyTouCan has PubChem mapping or not." + ], + "score": { + "usability_domain_length": 351 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000430/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Ontology (GO) dataset contains sarscov2 [taxid:2697049] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 350 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000430/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Ontology (GO) dataset contains sarscov2 [taxid:2697049] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 350 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000021/1.1", + "usability_domain": [ + "List of ngsQC data from Pond Lab", + "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using Pond Lab's NGS QC workflows constructed in Galaxy. The primary use case for this data set is to explore ngsQC results." + ], + "score": { + "usability_domain_length": 349 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000622/v-2.0.2", + "usability_domain": [ + "This dataset contains NCBI PubMed IDs associated with all of GlyGen's proteins and glycans with their corresponding GlyGen publication detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID. The dataset is for all GlyGen's organisms and the output file is split into three files." + ], + "score": { + "usability_domain_length": 348 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000440/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Sequence Info (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 348 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000440/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Sequence Info (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 348 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000622/v-2.1.1", + "usability_domain": [ + "This dataset contains NCBI PubMed IDs associated with all of GlyGen's proteins and glycans with their corresponding GlyGen publication detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID. The dataset is for all GlyGen's organisms and the output file is split into three files." + ], + "score": { + "usability_domain_length": 348 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000275/v-2.0.2", + "usability_domain": [ + "The Rat Protein Information (NCBI RefSeq) dataset contains rat [taxid:10116] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000411/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Isoform sequences (UniProtKB) dataset contains sarscov1 [taxid:694009] protein fasta sequences for the sarscov1 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000469/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1b [taxid:11116] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000275/v-2.1.1", + "usability_domain": [ + "The Rat Protein Information (NCBI RefSeq) dataset contains rat [taxid:10116] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000411/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Isoform sequences (UniProtKB) dataset contains sarscov1 [taxid:694009] protein fasta sequences for the sarscov1 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000469/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1b [taxid:11116] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 347 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000418/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Citations dataset contains publication information for sarscov1 [taxid:694009] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 346 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000672/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Reactome contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 346 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000418/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Citations dataset contains publication information for sarscov1 [taxid:694009] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 346 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000672/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Reactome contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 346 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000422/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov2 [taxid:2697049] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 345 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000361/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Gene Ontology (GO) dataset contains hcv1a [taxid:11108] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 345 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000361/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Gene Ontology (GO) dataset contains hcv1a [taxid:11108] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 345 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000422/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov2 [taxid:2697049] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 345 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000035/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", + "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 344 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000030/1.0", + "usability_domain": [ + "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", + "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 344 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000030/1.2", + "usability_domain": [ + "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", + "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "score": { + "usability_domain_length": 344 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000435/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) EBI-UniProtKB NT file contains proteome data for sarscov1 [taxid:694009] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 344 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000435/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) EBI-UniProtKB NT file contains proteome data for sarscov1 [taxid:694009] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 344 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000406/v-2.0.2", + "usability_domain": [ + "The Mouse NCBI Protein Linkouts contains mouse [taxid:10090] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000523/v-2.0.2", + "usability_domain": [ + "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000281/v-2.0.2", + "usability_domain": [ + "The dataset provides information on the glycan properties such as mass, permethylated mass, topology, base composition, composition and the number of monosaccharides for the associated glycan (GlyTouCan Accession). This dataset is the master-list of the GlyTouCan accessions currently present in GlyGen. Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000281/v-2.1.1", + "usability_domain": [ + "The dataset provides information on the glycan properties such as mass, permethylated mass, topology, base composition, composition and the number of monosaccharides for the associated glycan (GlyTouCan Accession). This dataset is the master-list of the GlyTouCan accessions currently present in GlyGen. Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000406/v-2.1.1", + "usability_domain": [ + "The Mouse NCBI Protein Linkouts contains mouse [taxid:10090] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000523/v-2.1.1", + "usability_domain": [ + "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 343 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000405/v-2.0.2", + "usability_domain": [ + "The Human NCBI Protein Linkouts contains human [taxid:9606] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 342 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000405/v-2.1.1", + "usability_domain": [ + "The Human NCBI Protein Linkouts contains human [taxid:9606] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 342 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000224/v-2.0.2", + "usability_domain": [ + "The Rat Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 341 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000039/v-2.0.2", + "usability_domain": [ + "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 341 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000039/v-2.1.1", + "usability_domain": [ + "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 341 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000224/v-2.1.1", + "usability_domain": [ + "The Rat Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 341 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000038/v-2.0.2", + "usability_domain": [ + "The Human Glycosylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 340 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000468/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1a [taxid:11108] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 340 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000038/v-2.1.1", + "usability_domain": [ + "The Human Glycosylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 340 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000468/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1a [taxid:11108] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 340 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000347/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Canonical (UniProtKB) Sequences dataset contains hcv1b [taxid:11116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000407/v-2.0.2", + "usability_domain": [ + "The Rat NCBI Protein Linkouts contains rat [taxid:10116] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000244/v-2.0.2", + "usability_domain": [ + "The Rat Proteome Master list dataset contains rat [taxid:10116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000096/1.0.36", + "usability_domain": [ + "A0400 is a panel of identified protein prognostic biomarkers (differential expression) and a gene prognostic biomarker (mutation) in colorectal cancer. The proteins are MLH1 (UPKB:P40692), MSH2 (UPKB:P43246), MSH6 (UPKB:P52701), PMS2 (UPKB:P54278) and the gene is BRAF (UPKB:P15056). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000244/v-2.1.1", + "usability_domain": [ + "The Rat Proteome Master list dataset contains rat [taxid:10116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000347/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Canonical (UniProtKB) Sequences dataset contains hcv1b [taxid:11116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000407/v-2.1.1", + "usability_domain": [ + "The Rat NCBI Protein Linkouts contains rat [taxid:10116] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", + "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" + ], + "score": { + "usability_domain_length": 339 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000001/v-2.0.2", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000460/v-2.0.2", + "usability_domain": [ + "The Mouse Germline Mutation dataset contains mouse [taxid:10090] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000001/2.0.3", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000001/2.1.1", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000001/v-2.1.1", + "usability_domain": [ + "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000460/v-2.1.1", + "usability_domain": [ + "The Mouse Germline Mutation dataset contains mouse [taxid:10090] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 338 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000492/v-2.0.2", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000459/v-2.0.2", + "usability_domain": [ + "The Human Germline Mutation dataset contains human [taxid:9606] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000416/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Canonical (UniProtKB) Sequences dataset contains sarscov2 [taxid:2697049] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000643/v-2.0.2", + "usability_domain": [ + "The Fruitfly N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for fruitfly [taxid:7227] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. " + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000024/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] differentially expressed miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed miRNA in cancer samples. miRNAs are mapped to RefSeq transcripts, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000416/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Canonical (UniProtKB) Sequences dataset contains sarscov2 [taxid:2697049] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000459/v-2.1.1", + "usability_domain": [ + "The Human Germline Mutation dataset contains human [taxid:9606] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000492/v-2.1.1", + "usability_domain": [ + "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000643/v-2.1.1", + "usability_domain": [ + "The Fruitfly N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for fruitfly [taxid:7227] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. " + ], + "score": { + "usability_domain_length": 337 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000686/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000429/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Gene Ontology (GO) dataset contains sarscov1 [taxid:694009] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000481/v-2.0.2", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000429/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Gene Ontology (GO) dataset contains sarscov1 [taxid:694009] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000481/v-2.1.1", + "usability_domain": [ + "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000686/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen" + ], + "score": { + "usability_domain_length": 336 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000493/v-2.0.2", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 335 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000461/v-2.0.2", + "usability_domain": [ + "The Rat Germline Mutation dataset contains rat [taxid:10116] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB. " + ], + "score": { + "usability_domain_length": 335 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000461/v-2.1.1", + "usability_domain": [ + "The Rat Germline Mutation dataset contains rat [taxid:10116] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB. " + ], + "score": { + "usability_domain_length": 335 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000493/v-2.1.1", + "usability_domain": [ + "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" + ], + "score": { + "usability_domain_length": 335 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000008/1.2", + "usability_domain": [ + "Assembly metadata extracted from NCBI BioProjects.", + "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000008/1.3", + "usability_domain": [ + "Assembly metadata extracted from NCBI BioProjects.", + "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000008/1.4", + "usability_domain": [ + "Assembly metadata extracted from NCBI BioProjects.", + "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000462/v-2.0.2", + "usability_domain": [ + "The Human Somatic Mutation dataset contains human [taxid:9606] somatic mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) somatic mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000646/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Information dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000303/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to KEGG glycan ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.genome.jp/kegg/glycan/" + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000230/v-2.0.2", + "usability_domain": [ + "The Human Congenital Disorders of Glycosylation dataset contains a list of congenital disorders of glycosylation mapped to human [taxid:9606] UniProtKB accessions. The dataset also contains mouse orthologs and links to phenotype page on MGI. If you use this dataset please provide proper attribution to Monarch Initiative and GlyGen. " + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000230/v-2.1.1", + "usability_domain": [ + "The Human Congenital Disorders of Glycosylation dataset contains a list of congenital disorders of glycosylation mapped to human [taxid:9606] UniProtKB accessions. The dataset also contains mouse orthologs and links to phenotype page on MGI. If you use this dataset please provide proper attribution to Monarch Initiative and GlyGen. " + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000303/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to KEGG glycan ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.genome.jp/kegg/glycan/" + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000462/v-2.1.1", + "usability_domain": [ + "The Human Somatic Mutation dataset contains human [taxid:9606] somatic mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) somatic mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000646/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Information dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 334 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000346/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Canonical (UniProtKB) Sequences dataset contains hcv1a [taxid:11108] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 332 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000302/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to GLYCOSCIENCES.DE ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;http://www.glycosciences.de/" + ], + "score": { + "usability_domain_length": 332 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000302/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to GLYCOSCIENCES.DE ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;http://www.glycosciences.de/" + ], + "score": { + "usability_domain_length": 332 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000346/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Canonical (UniProtKB) Sequences dataset contains hcv1a [taxid:11108] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 332 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000524/v-2.0.2", + "usability_domain": [ + "The Rat Glycation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000421/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov1 [taxid:694009] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000690/v-2.0.2", + "usability_domain": [ + "This dataset contains fruitfly [taxid:7227] pathway information from Reactome database and mapped to the UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen", + "Reactome is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000687/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Reactions (Rhea) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants ID, reaction ID, pathway ID, summary etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen\"" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000421/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov1 [taxid:694009] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000524/v-2.1.1", + "usability_domain": [ + "The Rat Glycation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000687/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Reactions (Rhea) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants ID, reaction ID, pathway ID, summary etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen\"" + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000690/v-2.1.1", + "usability_domain": [ + "This dataset contains fruitfly [taxid:7227] pathway information from Reactome database and mapped to the UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen", + "Reactome is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 331 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000034/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Information dataset contains mouse [taxid:10090] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 329 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000034/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Information dataset contains mouse [taxid:10090] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 329 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000019/1.0", + "usability_domain": [ + "List of ngsQC data from HIVE Lab.", + "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results." + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000019/1.1", + "usability_domain": [ + "List of ngsQC data from HIVE Lab.", + "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results." + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000033/v-2.0.2", + "usability_domain": [ + "The Human Protein Information dataset contains human [taxid:9606] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000306/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to UniCarbDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://unicarb-db.expasy.org/" + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000033/v-2.1.1", + "usability_domain": [ + "The Human Protein Information dataset contains human [taxid:9606] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000306/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to UniCarbDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://unicarb-db.expasy.org/" + ], + "score": { + "usability_domain_length": 328 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000301/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to GlyConnect ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;https://glyconnect.expasy.org" + ], + "score": { + "usability_domain_length": 327 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000241/v-2.0.2", + "usability_domain": [ + "The Rat Protein Information dataset contains rat [taxid:10116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 327 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000241/v-2.1.1", + "usability_domain": [ + "The Rat Protein Information dataset contains rat [taxid:10116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 327 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000301/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to GlyConnect ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;https://glyconnect.expasy.org" + ], + "score": { + "usability_domain_length": 327 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000133/v-2.0.2", + "usability_domain": [ + "The Human Protein Function (NCBI RefSeq) dataset contains biological function annotation of human [taxid:9606] genes from GeneRIF section in the NCBI RefSeq database. The dataset is downloaded from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 326 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000133/v-2.1.1", + "usability_domain": [ + "The Human Protein Function (NCBI RefSeq) dataset contains biological function annotation of human [taxid:9606] genes from GeneRIF section in the NCBI RefSeq database. The dataset is downloaded from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 326 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000134/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Function (NCBI RefSeq) dataset contains biological function annotation of mouse [taxid:10090] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 324 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000134/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Function (NCBI RefSeq) dataset contains biological function annotation of mouse [taxid:10090] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 324 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000058/1.0", + "usability_domain": [ + "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000307/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to UniCarbKB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; http://www.unicarbkb.org/" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000620/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000415/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Canonical (UniProtKB) Sequences dataset contains sarscov1 [taxid:694009] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000107/1.0.36", + "usability_domain": [ + "A0411 is the 23andMe PGS Genetic Health Risk Report for MUTYHAssociated Polyposis is indicated for reporting of the Y179C and the G396D variants in the MUTYH gene. The report describes if a person is at increased risk of developing colorectal cancer. [FTCID:K182784]. This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000307/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to UniCarbKB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; http://www.unicarbkb.org/" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000415/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Canonical (UniProtKB) Sequences dataset contains sarscov1 [taxid:694009] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000620/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" + ], + "score": { + "usability_domain_length": 323 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000394/v-2.0.2", + "usability_domain": [ + "The Rat Protein Names (NCBI RefSeq) dataset contains rat [taxid:10116] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 321 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000394/v-2.1.1", + "usability_domain": [ + "The Rat Protein Names (NCBI RefSeq) dataset contains rat [taxid:10116] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 321 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000249/v-2.0.2", + "usability_domain": [ + "The Rat Protein Function (NCBI RefSeq) dataset contains biological function annotation of rat [taxid:10116] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 320 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000399/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Sequence Info (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 320 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000249/v-2.1.1", + "usability_domain": [ + "The Rat Protein Function (NCBI RefSeq) dataset contains biological function annotation of rat [taxid:10116] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " + ], + "score": { + "usability_domain_length": 320 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000399/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Sequence Info (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 320 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000025/1.1", + "usability_domain": [ + "List of ngsQC data from Crandall Lab", + "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as Salmonella. ngsQC was carried out using HIVE1's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. " + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000295/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to CarbBank ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.genome.jp/dbget-bin/www_bfind?carbbank" + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000463/v-2.0.2", + "usability_domain": [ + "The Human Literature Mutation dataset contains human [taxid:9606] mutation data extracted from literature using the DIMEX tool. The dataset contains only the mutations that have amino acid information and position in the mutation mention column.. If you use this dataset provide proper attribution to GlyGen and OncoMX." + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000398/v-2.0.2", + "usability_domain": [ + "The Human Protein Sequence Info (UniProtKB) dataset contains human [taxid:9606] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000295/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to CarbBank ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.genome.jp/dbget-bin/www_bfind?carbbank" + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000398/v-2.1.1", + "usability_domain": [ + "The Human Protein Sequence Info (UniProtKB) dataset contains human [taxid:9606] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000463/v-2.1.1", + "usability_domain": [ + "The Human Literature Mutation dataset contains human [taxid:9606] mutation data extracted from literature using the DIMEX tool. The dataset contains only the mutations that have amino acid information and position in the mutation mention column.. If you use this dataset provide proper attribution to GlyGen and OncoMX." + ], + "score": { + "usability_domain_length": 319 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000650/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein PTM Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. If you use this dataset please give proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 318 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000058/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] biomarkers to measure thrombosis in cancer patients. This file contains 12 biomarkers extracted from the CSSI blood biomarkers retrieved fro the CSSI portal. There are 12 biomarkers to measure thrombosis. https://cssi-dcc.nci.nih.gov/cssiportal/ and https://cssi.cancer.gov/cancer-thrombosis" + ], + "score": { + "usability_domain_length": 318 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000650/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein PTM Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. If you use this dataset please give proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 318 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000025/1.0", + "usability_domain": [ + "List of ngsQC data from Crandall Lab", + "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as Salmonella. ngsQC was carried out using HIVE1's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results" + ], + "score": { + "usability_domain_length": 317 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000013/v-2.0.2", + "usability_domain": [ + "The Mouse EBI-UniProtKB NT file contains proteome data for mouse [taxid:10090] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 317 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000013/v-2.1.1", + "usability_domain": [ + "The Mouse EBI-UniProtKB NT file contains proteome data for mouse [taxid:10090] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 317 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000358/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1b [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000400/v-2.0.2", + "usability_domain": [ + "The Rat Protein Sequence Info (UniProtKB) dataset contains rat [taxid:10116] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000053/v-2.0.2", + "usability_domain": [ + "The Human Protein Isoform sequences (UniProtKB) dataset contains human [taxid:9606] protein fasta sequences for the human isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000054/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Isoform sequences (UniProtKB) dataset contains mouse [taxid:10090] protein fasta sequences for the mouse isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000053/v-2.1.1", + "usability_domain": [ + "The Human Protein Isoform sequences (UniProtKB) dataset contains human [taxid:9606] protein fasta sequences for the human isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000054/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Isoform sequences (UniProtKB) dataset contains mouse [taxid:10090] protein fasta sequences for the mouse isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000358/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1b [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000400/v-2.1.1", + "usability_domain": [ + "The Rat Protein Sequence Info (UniProtKB) dataset contains rat [taxid:10116] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 316 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000360/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1b [taxid:11116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 315 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000003/v-2.0.2", + "usability_domain": [ + "The Human EBI-UniProtKB NT file contains proteome data for human [taxid:9606] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 315 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000003/v-2.1.1", + "usability_domain": [ + "The Human EBI-UniProtKB NT file contains proteome data for human [taxid:9606] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 315 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000360/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1b [taxid:11116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 315 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000674/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref FlyBase contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to FlyBase database accessions/identifiers. If you use this dataset please provide proper attribution to FlyBase and GlyGen", + "FlyBase is a database for Drosophila gene and genomes. https://flybase.org/" + ], + "score": { + "usability_domain_length": 314 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000439/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 314 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000439/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 314 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000674/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref FlyBase contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to FlyBase database accessions/identifiers. If you use this dataset please provide proper attribution to FlyBase and GlyGen", + "FlyBase is a database for Drosophila gene and genomes. https://flybase.org/" + ], + "score": { + "usability_domain_length": 314 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.0", + "usability_domain": [ + "QC of short read sequences assembled into genomes.", + "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.1", + "usability_domain": [ + "QC of short read sequences assembled into genomes.", + "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.2", + "usability_domain": [ + "QC of short read sequences assembled into genomes.", + "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000012/1.4", + "usability_domain": [ + "QC of short read sequences assembled into genomes.", + "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000496/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000304/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to PDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.rcsb.org/" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000388/v-2.0.2", + "usability_domain": [ + "The Mouse Gene Symbols (NCBI RefSeq) dataset contains mouse [taxid:10090] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000304/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to PDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.rcsb.org/" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000388/v-2.1.1", + "usability_domain": [ + "The Mouse Gene Symbols (NCBI RefSeq) dataset contains mouse [taxid:10090] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000496/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 313 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000255/v-2.0.2", + "usability_domain": [ + "The Rat Protein Isoform sequences (UniProtKB) dataset contains rat [taxid:10116] protein fasta sequences for the rat isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000252/v-2.0.2", + "usability_domain": [ + "The Mouse Gene Ontology (GO) dataset contains mouse [taxid:10090] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000278/v-2.0.2", + "usability_domain": [ + "The Rat EBI-UniProtKB NT file contains proteome data for rat [taxid:10116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000387/v-2.0.2", + "usability_domain": [ + "The Human Gene Symbols (NCBI RefSeq) dataset contains human [taxid:9606] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000252/v-2.1.1", + "usability_domain": [ + "The Mouse Gene Ontology (GO) dataset contains mouse [taxid:10090] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000255/v-2.1.1", + "usability_domain": [ + "The Rat Protein Isoform sequences (UniProtKB) dataset contains rat [taxid:10116] protein fasta sequences for the rat isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000278/v-2.1.1", + "usability_domain": [ + "The Rat EBI-UniProtKB NT file contains proteome data for rat [taxid:10116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000387/v-2.1.1", + "usability_domain": [ + "The Human Gene Symbols (NCBI RefSeq) dataset contains human [taxid:9606] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 312 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000258/v-2.0.2", + "usability_domain": [ + "The Homolog Clusters dataset contains homologs of human [taxid: 9606], mouse [taxid:10090] and rat [taxid:10116] species mapped to UniProtKB canonical accessions. The homologs data is downloaded from OMA browser and MGI. If you use this dataset please provide proper attribution to MGI, OMA Browser and GlyGen. " + ], + "score": { + "usability_domain_length": 311 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000258/v-2.1.1", + "usability_domain": [ + "The Homolog Clusters dataset contains homologs of human [taxid: 9606], mouse [taxid:10090] and rat [taxid:10116] species mapped to UniProtKB canonical accessions. The homologs data is downloaded from OMA browser and MGI. If you use this dataset please provide proper attribution to MGI, OMA Browser and GlyGen. " + ], + "score": { + "usability_domain_length": 311 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000389/v-2.0.2", + "usability_domain": [ + "The Rat Gene Symbols (NCBI RefSeq) dataset contains rat [taxid:10116] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 310 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000654/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Site Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 310 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000389/v-2.1.1", + "usability_domain": [ + "The Rat Gene Symbols (NCBI RefSeq) dataset contains rat [taxid:10116] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 310 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000654/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Site Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. If you use this dataset please give proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 310 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000212/2.9", + "usability_domain": [ + " \"Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\",", + " \"Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\"," + ], + "score": { + "usability_domain_length": 309 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000223/v-2.0.2", + "usability_domain": [ + "The Human Gene Ontology (GO) dataset contains human [taxid:9606] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 309 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000357/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 309 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000223/v-2.1.1", + "usability_domain": [ + "The Human Gene Ontology (GO) dataset contains human [taxid:9606] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 309 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000357/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 309 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000359/v-2.0.2", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000682/v-2.0.2", + "usability_domain": [ + "The Fruitfly Phosphorylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000285/v-2.0.2", + "usability_domain": [ + "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlyTouCan and UniCarbKB database.The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.unicarbkb.org/" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000242/v-2.0.2", + "usability_domain": [ + "The Rat Gene Ontology (GO) dataset contains rat [taxid:10116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000242/v-2.1.1", + "usability_domain": [ + "The Rat Gene Ontology (GO) dataset contains rat [taxid:10116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000285/v-2.1.1", + "usability_domain": [ + "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlyTouCan and UniCarbKB database.The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.unicarbkb.org/" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000359/v-2.1.1", + "usability_domain": [ + "The Hepatitis C virus (genotype 1a, isolate H) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000682/v-2.1.1", + "usability_domain": [ + "The Fruitfly Phosphorylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 308 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000282/v-2.0.2", + "usability_domain": [ + "The dataset provides information on the glycan classification such as glycan type and sub-type for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 307 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000282/v-2.1.1", + "usability_domain": [ + "The dataset provides information on the glycan classification such as glycan type and sub-type for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 307 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000132/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Function (UniProtKB) dataset contains biological function annotation for mouse [taxid:10090] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 306 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000021/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] genes with cancer data in BioMuta and/or BioXpress - This file contains human [taxid:9606] genes with associated cancer data in BioMuta and/or BioXpress cancer mutation and expression databases. Genes are mapped to canonical UniProtKB/Swiss-Prot accessions and RefSeq accession. " + ], + "score": { + "usability_domain_length": 306 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000132/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Function (UniProtKB) dataset contains biological function annotation for mouse [taxid:10090] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 306 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000294/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to BCSDB ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://csdb.glycoscience.ru/bacterial/" + ], + "score": { + "usability_domain_length": 305 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000131/v-2.0.2", + "usability_domain": [ + "The Human Protein Function (UniProtKB) dataset contains biological function annotation for human [taxid:9606] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 305 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000039/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] genes with cancer data in BioMuta and/or BioXpress - This file contains human [taxid:9606] genes with associated cancer data in BioMuta and/or BioXpress cancer mutation and expression databases. Genes are mapped to canonical UniProtKB/Swiss-Prot accessions and RefSeq accession." + ], + "score": { + "usability_domain_length": 305 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000131/v-2.1.1", + "usability_domain": [ + "The Human Protein Function (UniProtKB) dataset contains biological function annotation for human [taxid:9606] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 305 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000294/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to BCSDB ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://csdb.glycoscience.ru/bacterial/" + ], + "score": { + "usability_domain_length": 305 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000247/v-2.0.2", + "usability_domain": [ + "The Rat Protein Function (UniProtKB) dataset contains biological functional annotations for rat [taxid:10116] protein accessions from the UniProtKB database. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 303 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000247/v-2.1.1", + "usability_domain": [ + "The Rat Protein Function (UniProtKB) dataset contains biological functional annotations for rat [taxid:10116] protein accessions from the UniProtKB database. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 303 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000478/v-2.0.2", + "usability_domain": [ + "The dataset provides relationship between the glycans (identified by the GlyTouCan accession). The relation is categorized as either Ancestor, Descendant, SubsumedBy or Subsumes. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." + ], + "score": { + "usability_domain_length": 302 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000478/v-2.1.1", + "usability_domain": [ + "The dataset provides relationship between the glycans (identified by the GlyTouCan accession). The relation is categorized as either Ancestor, Descendant, SubsumedBy or Subsumes. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." + ], + "score": { + "usability_domain_length": 302 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000033/1.0.25", + "usability_domain": [ + "The glycotyper dataset displays normalized intensity values detected and quantified by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for N-glycans of specific serum proteins across Hepatocellular carcinoma and other patient cohorts such as transplant and cirrhosis." + ], + "score": { + "usability_domain_length": 301 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000046/1.0.36", + "usability_domain": [ + "The glycotyper dataset displays normalized intensity values detected and quantified by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for N-glycans of specific serum proteins across Hepatocellular carcinoma and other patient cohorts such as transplant and cirrhosis." + ], + "score": { + "usability_domain_length": 301 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000438/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 300 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000438/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 300 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000413/v-2.0.2", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000298/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to CFG ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.functionalglycomics.org/" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000598/v-2.0.2", + "usability_domain": [ + "The Mouse UniProtKB Xref Rhea contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000097/1.0.36", + "usability_domain": [ + "A0401 is a panel for PCA3 score of measured protein mRNA level ratio as predictive biomarker for prostate cancer patients. A PCA3 Score <25 is associated with a decreased likelihood of a positive biopsy. Prostatic biopsy is required for diagnosis of cancer. The proteins are PCA3, PSA (UPKB:P07288)." + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000298/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to CFG ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.functionalglycomics.org/" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000413/v-2.1.1", + "usability_domain": [ + "The SARS coronavirus (SARS-CoV-1) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000598/v-2.1.1", + "usability_domain": [ + "The Mouse UniProtKB Xref Rhea contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 299 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000597/v-2.0.2", + "usability_domain": [ + "The Human UniProtKB Xref Rhea contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 298 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000597/v-2.1.1", + "usability_domain": [ + "The Human UniProtKB Xref Rhea contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 298 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000337/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to GlycoEpitope for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.glycoepitope.jp/" + ], + "score": { + "usability_domain_length": 296 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000009/1.0.25", + "usability_domain": [ + "Human Cancer Disease Ontology Slim IDs (V2.1) directly mapped to UBERON Anatomical Entity IDs - The csv file human_doid_uberon_mapping.csv contains slim-level Disease Ontology IDs (DOIDs) from the Cancer DO Slim project mapped to the corresponding tissue in the UBERON Anatomical Entity Ontology." + ], + "score": { + "usability_domain_length": 296 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000337/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to GlycoEpitope for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.glycoepitope.jp/" + ], + "score": { + "usability_domain_length": 296 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000525/v-2.0.2", + "usability_domain": [ + "The Human Disease Ontology Mapping dataset contains human [taxid:9606] UniProtKB canonical accession cross-reference mappings to the Disease Ontology Ids (DOID)", + "If you use this dataset please provide proper attribution to GlyGen and Disease ontology, Monarch Disease Ontology and EMBL-EBI-UniProt" + ], + "score": { + "usability_domain_length": 295 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000086/1.0.36", + "usability_domain": [ + "A0390 is a biomarker panel of identified gene predictive biomarkers (mutations) in ovarian cancer (DOID:2394)patients with deleterious or suspected deleterious germline BRCA variants. The genes are BRCA1 (UPKB:P38398), BRCA2 (UPKB:P51587). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 295 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000525/v-2.1.1", + "usability_domain": [ + "The Human Disease Ontology Mapping dataset contains human [taxid:9606] UniProtKB canonical accession cross-reference mappings to the Disease Ontology Ids (DOID)", + "If you use this dataset please provide proper attribution to GlyGen and Disease ontology, Monarch Disease Ontology and EMBL-EBI-UniProt" + ], + "score": { + "usability_domain_length": 295 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000470/1.0", + "usability_domain": [ + "This BCO draft was generated by the Nextflow engine, and revised at the BCO Portal. Additional documentation about the Nextflow-BCO connection is here: https://github.com/nextflow-io/nf-prov\n\nAdditional documentation about the RNAseq-NF pipeline is here:\nhttps://github.com/nextflow-io/rnaseq-nf" + ], + "score": { + "usability_domain_length": 295 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000002/v-2.0.2", + "usability_domain": [ + "The Human Protein Canonical (UniProtKB) Sequences dataset contains human [taxid:9606] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 294 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000002/v-2.1.1", + "usability_domain": [ + "The Human Protein Canonical (UniProtKB) Sequences dataset contains human [taxid:9606] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 294 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000684/v-2.0.2", + "usability_domain": [ + "The Fruitfly Glycosyltransferases dataset contains a list of fruitfly [taxid:7227] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000599/v-2.0.2", + "usability_domain": [ + "The Rat UniProtKB Xref Rhea contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Rhea" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000338/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to MatrixDB for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://matrixdb.univ-lyon1.fr/" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000338/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to MatrixDB for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://matrixdb.univ-lyon1.fr/" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000599/v-2.1.1", + "usability_domain": [ + "The Rat UniProtKB Xref Rhea contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", + "Rhea" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000684/v-2.1.1", + "usability_domain": [ + "The Fruitfly Glycosyltransferases dataset contains a list of fruitfly [taxid:7227] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 293 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000333/v-2.0.2", + "usability_domain": [ + "The Mouse Interaction data (MatrixDB) dataset contains interaction data between Mouse [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 292 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000333/v-2.1.1", + "usability_domain": [ + "The Mouse Interaction data (MatrixDB) dataset contains interaction data between Mouse [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 292 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000240/v-2.0.2", + "usability_domain": [ + "The Rat Protein Canonical Sequences (UniProtKB) dataset contains rat [taxid:10116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000332/v-2.0.2", + "usability_domain": [ + "The Human Interaction data (MatrixDB) dataset contains interaction data between Human [taxid:9606] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000681/v-2.0.2", + "usability_domain": [ + "The Fruitfly Glycosylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000095/1.0.36", + "usability_domain": [ + "A0399 Panel of identified chromosome somatic mutants (aneuploidy) and locus mutant (loss) monitoring biomarkers in urinary bladder (DOID:11054) cancer. The chromosomes are chromosme 3, chromosme 7, chromosme 17, and the locus is 9p21. This panel is curated from the FDA Approved biomarkers" + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000240/v-2.1.1", + "usability_domain": [ + "The Rat Protein Canonical Sequences (UniProtKB) dataset contains rat [taxid:10116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000332/v-2.1.1", + "usability_domain": [ + "The Human Interaction data (MatrixDB) dataset contains interaction data between Human [taxid:9606] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000681/v-2.1.1", + "usability_domain": [ + "The Fruitfly Glycosylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 291 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000030/v-2.0.2", + "usability_domain": [ + "The Mouse Glycosyltransferases dataset contains a list of mouse [taxid:10090] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 289 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000030/v-2.1.1", + "usability_domain": [ + "The Mouse Glycosyltransferases dataset contains a list of mouse [taxid:10090] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 289 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000334/v-2.0.2", + "usability_domain": [ + "The Rat Interaction data (MatrixDB) dataset contains interaction data between Rat [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 288 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000477/v-2.0.2", + "usability_domain": [ + "The dataset provides details (such as image size, image notation, image style, image format)for the glycan (identified by GlyTouCan accession) images used in GlyGen. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" + ], + "score": { + "usability_domain_length": 288 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000334/v-2.1.1", + "usability_domain": [ + "The Rat Interaction data (MatrixDB) dataset contains interaction data between Rat [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." + ], + "score": { + "usability_domain_length": 288 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000477/v-2.1.1", + "usability_domain": [ + "The dataset provides details (such as image size, image notation, image style, image format)for the glycan (identified by GlyTouCan accession) images used in GlyGen. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" + ], + "score": { + "usability_domain_length": 288 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000280/v-2.0.2", + "usability_domain": [ + "The Rat Glycohydrolases dataset contains a list of rat [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 285 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000280/v-2.1.1", + "usability_domain": [ + "The Rat Glycohydrolases dataset contains a list of rat [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 285 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000685/v-2.0.2", + "usability_domain": [ + "The Fruitfly Glycohydrolases dataset contains a list of fruitfly [taxid:7227] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 283 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000685/v-2.1.1", + "usability_domain": [ + "The Fruitfly Glycohydrolases dataset contains a list of fruitfly [taxid:7227] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 283 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000711/v-2.1.1", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." + ], + "score": { + "usability_domain_length": 282 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000025/v-2.0.2", + "usability_domain": [ + "The Human Glycohydrolases dataset contains a list of human [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 280 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000292/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in WURCS extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 280 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000290/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in IUPAC extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 280 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000025/v-2.1.1", + "usability_domain": [ + "The Human Glycohydrolases dataset contains a list of human [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 280 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000283/v-2.1.1", + "usability_domain": [ + "The dataset provides information on the glycan motifs for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281) and from GlycoMotif (https://glycomotif.glyomics.org/)." + ], + "score": { + "usability_domain_length": 280 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000026/v-2.0.2", + "usability_domain": [ + "The Mouse Glycohydrolases dataset contains a list of mouse [taxid:10090] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 279 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000287/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in Glycam IUPAC format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" + ], + "score": { + "usability_domain_length": 279 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000026/v-2.1.1", + "usability_domain": [ + "The Mouse Glycohydrolases dataset contains a list of mouse [taxid:10090] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 279 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000290/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in IUPAC extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 279 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000292/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in WURCS extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 279 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000046/1.0", + "usability_domain": [ + "A SARS-CoV-2 complete genome fasta file was derived from UniProt (UniProt ID: UP000464024), processed on the Argos Project server, and will be available on the front end data website for the Argos Project, data.argosdb.org. The complete genome contains information for 17 genes." + ], + "score": { + "usability_domain_length": 278 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000287/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in Glycam IUPAC format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://dev.glycam.org/" + ], + "score": { + "usability_domain_length": 278 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000709/v-2.1.1", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." + ], + "score": { + "usability_domain_length": 277 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000708/v-2.1.1", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." + ], + "score": { + "usability_domain_length": 276 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000559/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in Byonic format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" + ], + "score": { + "usability_domain_length": 273 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000578/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to GPTWiki ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database:https://gptwiki.glyomics.org/" + ], + "score": { + "usability_domain_length": 273 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000288/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in GlycoCT format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 273 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000559/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in Byonic format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" + ], + "score": { + "usability_domain_length": 273 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000578/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to GPTWiki ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database:https://gptwiki.glyomics.org/" + ], + "score": { + "usability_domain_length": 273 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000511/v-2.0.2", + "usability_domain": [ + "The Human Proteoform Citations (Literature) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000143. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000088/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000703/v-2.0.2", + "usability_domain": [ + "The dataset contains pathway data for all N-glycans which are fully mapped to the GlycoTree framework. Each pathway specifies details for a particular reaction catalyzed by enzymes (UniProtKB Accession) which effect the associated glycan structures (GlyTouCan Accessions)." + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000088/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000511/v-2.1.1", + "usability_domain": [ + "The Human Proteoform Citations (Literature) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000143. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000703/v-2.1.1", + "usability_domain": [ + "The dataset contains pathway data for all N-glycans which are fully mapped to the GlycoTree framework. Each pathway specifies details for a particular reaction catalyzed by enzymes (UniProtKB Accession) which effect the associated glycan structures (GlyTouCan Accessions)." + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000710/v-2.1.1", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO" + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000288/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in GlycoCT format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 272 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000648/v-2.0.2", + "usability_domain": [ + "The Fruitfly EBI-UniProtKB NT file contains proteome data for fruitfly [taxid:7227] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. T If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000032/v-2.0.2", + "usability_domain": [ + "The Mouse Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000087/v-2.0.2", + "usability_domain": [ + "The Human Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000032/v-2.1.1", + "usability_domain": [ + "The Mouse Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000087/v-2.1.1", + "usability_domain": [ + "The Human Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000648/v-2.1.1", + "usability_domain": [ + "The Fruitfly EBI-UniProtKB NT file contains proteome data for fruitfly [taxid:7227] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. T If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 271 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000222/v-2.0.2", + "usability_domain": [ + "The Rat Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for rat [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000652/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Sequence Info (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB protein fasta sequence information that includes sequence version and fasta header. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000031/v-2.0.2", + "usability_domain": [ + "The Human Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000110/1.0.36", + "usability_domain": [ + "A0435 is a Fibrosis vs Cirrhosis Biomarker Panel provided by Aswini Panigrahi from Georgetown University. This is a ratio of relative abundance of the di- and mono-sialylated O-glycoforms of HPX which shows a significant increase with the progression of liver disease. " + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000031/v-2.1.1", + "usability_domain": [ + "The Human Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000222/v-2.1.1", + "usability_domain": [ + "The Rat Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for rat [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000652/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Sequence Info (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB protein fasta sequence information that includes sequence version and fasta header. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 270 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000634/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Isoform sequences (UniProtKB) dataset contains fruitfly [taxid:7227] protein fasta sequences for the fruitfly isoform accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 269 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000579/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to Glycosmos ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glycosmos.org/" + ], + "score": { + "usability_domain_length": 269 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000579/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to Glycosmos ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glycosmos.org/" + ], + "score": { + "usability_domain_length": 269 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000634/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Isoform sequences (UniProtKB) dataset contains fruitfly [taxid:7227] protein fasta sequences for the fruitfly isoform accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 269 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000283/v-2.0.2", + "usability_domain": [ + "The dataset provides information on the glycan motifs for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 266 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000265/v-2.0.2", + "usability_domain": [ + "The Rat Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for rat [taxid:10116] proteins. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 265 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000265/v-2.1.1", + "usability_domain": [ + "The Rat Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for rat [taxid:10116] proteins. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 265 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000001/1.5", + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]" + ], + "score": { + "usability_domain_length": 264 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000013/1.0.25", + "usability_domain": [ + " List of mouse [taxid:10090] genes with normal RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx" + ], + "score": { + "usability_domain_length": 264 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000644/v-2.0.2", + "usability_domain": [ + "The Fruitfly Gene Ontology (GO) dataset contains fruitfly [taxid:7227] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 262 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000012/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] genes with normal RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx" + ], + "score": { + "usability_domain_length": 262 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000644/v-2.1.1", + "usability_domain": [ + "The Fruitfly Gene Ontology (GO) dataset contains fruitfly [taxid:7227] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 262 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000497/v-2.0.2", + "usability_domain": [ + "The dataset provides the byonic format of n-linked human glycans (identified by GlyTouCan Ac.). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/. " + ], + "score": { + "usability_domain_length": 261 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000497/v-2.1.1", + "usability_domain": [ + "The dataset provides the byonic format of n-linked human glycans (identified by GlyTouCan Ac.). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/. " + ], + "score": { + "usability_domain_length": 261 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000340/v-2.0.2", + "usability_domain": [ + "The dataset provides names for the associated glycan (GlyTouCan Accession) in different formats. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 260 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000340/v-2.1.1", + "usability_domain": [ + "The dataset provides names for the associated glycan (GlyTouCan Accession) in different formats. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 260 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000649/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 258 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000649/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 258 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000715/v-2.1.1", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000711, contributed by Jungfeng Ma's group." + ], + "score": { + "usability_domain_length": 257 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000639/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Function (UniProtKB) dataset contains biological function annotation for fruitfly [taxid:7227] protein accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 256 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000035/1.0.25", + "usability_domain": [ + "Table of gene preferential expression profiles in cancer cells. The csv file human_cancer_scRNA_preferential_expression contains expression specificity scores, expression specificity annotations, and associated biological information for cancer cell types." + ], + "score": { + "usability_domain_length": 256 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000639/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Function (UniProtKB) dataset contains biological function annotation for fruitfly [taxid:7227] protein accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " + ], + "score": { + "usability_domain_length": 256 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000702/v-2.0.2", + "usability_domain": [ + "The Fruitfly NCBI Protein Linkouts contains fruitfly [taxid:7227] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID." + ], + "score": { + "usability_domain_length": 253 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000074/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for head and Neck squamous cell carcinoma patients in the TCGA-HNSC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 253 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000702/v-2.1.1", + "usability_domain": [ + "The Fruitfly NCBI Protein Linkouts contains fruitfly [taxid:7227] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID." + ], + "score": { + "usability_domain_length": 253 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000683/v-2.0.2", + "usability_domain": [ + "This dataset contains fruitfly [taxid:7227] phosphorylation sites from iPTMNet database. The protein and sites are mapped to UniProtKB canonical accessions and fasta sequence. If you use this dataset please provide proper attribution to iPTMnet-GlyGen." + ], + "score": { + "usability_domain_length": 252 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000068/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for uterine corpus endometrial carcinoma patients in the TCGA-UCEC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 252 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000683/v-2.1.1", + "usability_domain": [ + "This dataset contains fruitfly [taxid:7227] phosphorylation sites from iPTMNet database. The protein and sites are mapped to UniProtKB canonical accessions and fasta sequence. If you use this dataset please provide proper attribution to iPTMnet-GlyGen." + ], + "score": { + "usability_domain_length": 252 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000515/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file sarscov1_proteoform_glycosylation_sites_literature.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 251 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000515/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file sarscov1_proteoform_glycosylation_sites_literature.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 251 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000717/v-2.1.1", + "usability_domain": [ + "The O-Glucosylation Glycosylation Citations dataset contains details of the publication present in the dataset Human O-Glucosylation Glycosylation Sites https://data.glygen.org/GLY_000716, contributed by Daniel Williamson from Bob Haltiwanger's group." + ], + "score": { + "usability_domain_length": 251 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000712/v-2.1.1", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000708, contributed by Jungfeng Ma's group." + ], + "score": { + "usability_domain_length": 251 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000713/v-2.1.1", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000709, contributed by Jungfeng Ma's group." + ], + "score": { + "usability_domain_length": 251 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000514/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file human_proteoform_glycosylation_sites_tyr_o_linked.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 250 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000514/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file human_proteoform_glycosylation_sites_tyr_o_linked.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 250 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000698/v-2.0.2", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Rhea contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 249 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000073/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for kidney renal clear cell carcinoma patients in the TCGA-KIRC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 249 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000698/v-2.1.1", + "usability_domain": [ + "The Fruitfly UniProtKB Xref Rhea contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", + "Rhea" + ], + "score": { + "usability_domain_length": 249 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000714/v-2.1.1", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000710, contributed by Jungfeng Ma's group." + ], + "score": { + "usability_domain_length": 247 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000015/1.0.25", + "usability_domain": [ + "Master results table of cancer cell-type expression specificity data. The csv file human_cancer_scRNA_expression contains expression specificity scores, expression specificity annotations, and associated disease information for cancer cell types." + ], + "score": { + "usability_domain_length": 246 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000072/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for liver hepatocellular carcinoma patients in the TCGA-LIHC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 246 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000491/v-2.0.2", + "usability_domain": [ + "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000636/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Canonical (UniProtKB) Sequences dataset contains fruitfly [taxid:7227] canonical protein fasta sequences from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000339/v-2.0.2", + "usability_domain": [ + "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000339/v-2.1.1", + "usability_domain": [ + "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000491/v-2.1.1", + "usability_domain": [ + "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000636/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Canonical (UniProtKB) Sequences dataset contains fruitfly [taxid:7227] canonical protein fasta sequences from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000600/v-2.1.1", + "usability_domain": [ + "-", + "The dataset provides evidence for a library of complex multiantennary Asn-linked N-glycans generated by chemo-enzymatic synthesis (PMID:30745240). The data was contributed by the Richard Cummings Laboratory (NCFG BIDMC Harvard Medical School)." + ], + "score": { + "usability_domain_length": 245 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000642/v-2.0.2", + "usability_domain": [ + "The Fruitfly Gene Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] gene Names/names (primary gene name and gene synonyms) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 244 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000076/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for bladder urothelial carcinoma patients in the TCGA-BLCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 244 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000069/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for lung squamous cell carcinoma patients in the TCGA-LUSC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 244 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000075/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for kidney chromophobe carcinoma patients in the TCGA-KICH study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 244 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000642/v-2.1.1", + "usability_domain": [ + "The Fruitfly Gene Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] gene Names/names (primary gene name and gene synonyms) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" + ], + "score": { + "usability_domain_length": 244 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000326/v-2.0.2", + "usability_domain": [ + "The dataset provides a set of glycans which are fully determined. Each glycan is associated with a GlyTouCan Accession. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." + ], + "score": { + "usability_domain_length": 243 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000326/v-2.1.1", + "usability_domain": [ + "The dataset provides a set of glycans which are fully determined. Each glycan is associated with a GlyTouCan Accession. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." + ], + "score": { + "usability_domain_length": 243 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000676/v-2.0.2", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (MCW) httpss://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 242 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000676/v-2.1.1", + "usability_domain": [ + "The Fruitfly O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (MCW) httpss://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 242 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000580/v-2.0.2", + "usability_domain": [ + "The dataset provides the glycan sequences in GlycoWorkBench format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). " + ], + "score": { + "usability_domain_length": 241 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000580/v-2.1.1", + "usability_domain": [ + "The dataset provides the glycan sequences in GlycoWorkBench format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281)." + ], + "score": { + "usability_domain_length": 239 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000066/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for esophageal carcinoma patients in the TCGA-ESCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 236 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000518/v-2.0.2", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (MCW) https://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 235 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000071/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for lung adenocarcinoma patients in the TCGA-LUAD study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 235 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000518/v-2.1.1", + "usability_domain": [ + "The Human O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (MCW) https://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 235 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000070/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for thyroid carcinoma patients in the TCGA-THCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 233 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000573/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000574/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000516/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000513/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000142. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000512/v-2.0.2", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000335. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000512/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000335. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000513/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000142. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000516/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000573/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000574/v-2.1.1", + "usability_domain": [ + "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." + ], + "score": { + "usability_domain_length": 232 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000067/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for prostate cancer patients in the TCGA-PRAD study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 231 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000044/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] mRNAs and miRNAs with literature evidence of expression in cancer - This file contains human [taxid:9606] mRNAs and miRNAs with reported expression in cancer mined from abstracts in PubMed using DEXTER." + ], + "score": { + "usability_domain_length": 229 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000065/1.0.36", + "usability_domain": [ + "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for breast cancer patients in the TCGA-BRCA study. Te dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." + ], + "score": { + "usability_domain_length": 228 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000089/1.0.36", + "usability_domain": [ + "A0393 is a panel of identified gene predictive biomarkers (mutations) in colorectal cancer (DOID:9256). The genes are KRAS (UPKB:P01116), NRAS (GTPase NRas gene (NRAS). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 224 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000014/1.2", + "usability_domain": [ + "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", + "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000014/1.3", + "usability_domain": [ + "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", + "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000014/1.4", + "usability_domain": [ + "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", + "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000014/1.5", + "usability_domain": [ + "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", + "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000651/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000651/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 222 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000635/v-2.0.2", + "usability_domain": [ + "The Fruitfly Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 221 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000635/v-2.1.1", + "usability_domain": [ + "The Fruitfly Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" + ], + "score": { + "usability_domain_length": 221 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000336/v-2.0.2", + "usability_domain": [ + "The Mouse (TaxID:10090) Xref GlycoProtDB contains x-refs to IDs present in the GlycoProtDB database (https://acgg.asia/gpdb2/index). If you use this dataset please provide proper attribution to GlycoProtDB and GlyGen. " + ], + "score": { + "usability_domain_length": 219 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000336/v-2.1.1", + "usability_domain": [ + "The Mouse (TaxID:10090) Xref GlycoProtDB contains x-refs to IDs present in the GlycoProtDB database (https://acgg.asia/gpdb2/index). If you use this dataset please provide proper attribution to GlycoProtDB and GlyGen. " + ], + "score": { + "usability_domain_length": 219 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000055/1.0.36", + "usability_domain": [ + "This file contains a list of human [taxid:9606] genes with cancer mutation data from dbSNP and EBI. This file contains custom human genes with associated cancer mutation data derived from germline and somatic mutation." + ], + "score": { + "usability_domain_length": 218 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000017/1.0.25", + "usability_domain": [ + "This file contains a list of human [taxid:9606] genes with cancer mutation data from dbSNP and EBI. This file contains custom human genes with associated cancer mutation data derived from germline and somatic mutation." + ], + "score": { + "usability_domain_length": 218 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000047/1.1", + "usability_domain": [ + "The Argos Biosample Metadata master data file sheet is a collection of metrics for NCBI Biosamples associated with the database for Reference Grade microbial Sequences (FDA ARGOS). This sheet was manually populated." + ], + "score": { + "usability_domain_length": 215 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000036/1.0.25", + "usability_domain": [ + "This table may be used as a list of candidate marker genes in cancer cells. This table features the top ten differentially expressed genes in the cells of a cancer type, determined with a Wilcoxon rank-sum test." + ], + "score": { + "usability_domain_length": 211 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000552/v-2.0.2", + "usability_domain": [ + "The dataset provides cross-references to GlyTouCan for the for GlyGen Mapper tool. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" + ], + "score": { + "usability_domain_length": 205 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000552/v-2.1.1", + "usability_domain": [ + "The dataset provides cross-references to GlyTouCan for the for GlyGen Mapper tool. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" + ], + "score": { + "usability_domain_length": 205 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000718/v-2.1.1", + "usability_domain": [ + "The Human Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Human Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000332." + ], + "score": { + "usability_domain_length": 205 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000719/v-2.1.1", + "usability_domain": [ + "The Mouse Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Mouse Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000333." + ], + "score": { + "usability_domain_length": 205 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000720/v-2.1.1", + "usability_domain": [ + "The Rat Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Rat Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000334." + ], + "score": { + "usability_domain_length": 201 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000696/v-2.0.2", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 198 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000696/v-2.1.1", + "usability_domain": [ + "The Mouse O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 198 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000040/1.0.36", + "usability_domain": [ + "The neoepitope dataset displays data on neoepitope peptides that arise from tumor-specific mutations. It catalogs experimental data on epitopes studied in humans in the context of various cancers." + ], + "score": { + "usability_domain_length": 196 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000034/1.0.25", + "usability_domain": [ + "The neoepitope dataset displays data on neoepitope peptides that arise from tumor-specific mutations. It catalogs experimental data on epitopes studied in humans in the context of various cancers." + ], + "score": { + "usability_domain_length": 196 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000700/v-2.0.2", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information for fruitfly [taxid:7227] proteins. " + ], + "score": { + "usability_domain_length": 195 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000700/v-2.1.1", + "usability_domain": [ + "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information for fruitfly [taxid:7227] proteins. " + ], + "score": { + "usability_domain_length": 195 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000697/v-2.0.2", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 194 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000090/1.0.36", + "usability_domain": [ + "A0390 is a panel of identified gene risk biomarkers (mutations) in ovarian cancer. The genes are BRCA1 (UPKB:P38398), BRCA2 (UPKB:P51587). This panel is curated from the FDA Approved biomarkers." + ], + "score": { + "usability_domain_length": 194 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000697/v-2.1.1", + "usability_domain": [ + "The Rat O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." + ], + "score": { + "usability_domain_length": 194 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000539/v-2.0.2", + "usability_domain": [ + "The datasets provides a list of GlyTouCan accessions which are supported by GNOme ontology (https://gnome.glyomics.org/) (https://gnome.glyomics.org/restrictions/GlyGen.StructureBrowser.html). " + ], + "score": { + "usability_domain_length": 193 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000539/v-2.1.1", + "usability_domain": [ + "The datasets provides a list of GlyTouCan accessions which are supported by GNOme ontology (https://gnome.glyomics.org/) (https://gnome.glyomics.org/restrictions/GlyGen.StructureBrowser.html). " + ], + "score": { + "usability_domain_length": 193 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000482/v-2.0.2", + "usability_domain": [ + "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in dataset https://data.glygen.org/GLYDS000479" + ], + "score": { + "usability_domain_length": 187 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000482/v-2.1.1", + "usability_domain": [ + "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in dataset https://data.glygen.org/GLYDS000479" + ], + "score": { + "usability_domain_length": 187 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_031929/1.0", + "usability_domain": [ + "Generation of paired-end synthetic reads from 4 virus sequences: Human Adenovirus (NC_001405.1), West Nile virus (NC_001563), Hepatitis B virus (NC_003977), and Lassa virus (NC_004297)." + ], + "score": { + "usability_domain_length": 186 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000083/1.0.36", + "usability_domain": [ + "Human [taxid:9606] miRNA file complete dataset corresponding to the current BioXpress version. All differentially expressed miRNAs in all cancers (from matched tumor-normal samples)." + ], + "score": { + "usability_domain_length": 182 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000054/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] mutations in cancer from TCGA and ICGC mapped to RefSeq nucleotide coordinates, UniProtKB amino acid coordinates, and Disease Ontology disease terms" + ], + "score": { + "usability_domain_length": 175 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000018/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] mutations in cancer from TCGA and ICGC mapped to RefSeq nucleotide coordinates, UniProtKB amino acid coordinates, and Disease Ontology disease terms" + ], + "score": { + "usability_domain_length": 175 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000309/v-2.0.2", + "usability_domain": [ + "The dataset lists the synthesized glycans which are identified by the the GlyTouCan accessions. Source:- The Boons Group. https://www.ccrc.uga.edu/~gjboons/boons/Home.htm()" + ], + "score": { + "usability_domain_length": 172 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000060/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] functional elements by cancer and race. This file contains functional elements for five different cancers and each cancer is categorized by race." + ], + "score": { + "usability_domain_length": 172 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000309/v-2.1.1", + "usability_domain": [ + "The dataset lists the synthesized glycans which are identified by the the GlyTouCan accessions. Source:- The Boons Group. https://www.ccrc.uga.edu/~gjboons/boons/Home.htm()" + ], + "score": { + "usability_domain_length": 172 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000732/v-2.1.1", + "usability_domain": [ + "The dataset contains publication information of the PMIDs present in the Human Glycosylation Sites UniCarbKB Glycomics Study dataset (https://data.glygen.org/GLY_000611)." + ], + "score": { + "usability_domain_length": 170 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000727/v-2.1.1", + "usability_domain": [ + "Human Protein Biomarkers Citations dataset contains details of the publication present in the dataset Human Protein Cancer Biomarkers https://data.glygen.org/GLY_000625" + ], + "score": { + "usability_domain_length": 168 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000734/v-2.1.1", + "usability_domain": [ + "The dataset contains publication information of the PMIDs present in the Rat Glycosylation Sites UniCarbKB Glycomics Study dataset (https://data.glygen.org/GLY_000733)." + ], + "score": { + "usability_domain_length": 168 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000053/1.0.36", + "usability_domain": [ + "Clinical data from normal samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." + ], + "score": { + "usability_domain_length": 167 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000038/1.0.25", + "usability_domain": [ + "Clinical data from normal samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." + ], + "score": { + "usability_domain_length": 167 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000037/1.0.25", + "usability_domain": [ + "Clinical data from cancer samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." + ], + "score": { + "usability_domain_length": 167 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000041/1.0.36", + "usability_domain": [ + "Clinical data from cancer samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." + ], + "score": { + "usability_domain_length": 167 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000739/v-2.1.1", + "usability_domain": [ + "The Human Germline Mutation Citations dataset contains details of the publications present in the Human Germline Mutation dataset (https://data.glygen.org/GLY_000459)." + ], + "score": { + "usability_domain_length": 167 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000722/v-2.1.1", + "usability_domain": [ + "The Human Literature Mutations Citations contains details of the publication present in the dataset Human Literature Mutations https://data.glygen.org/GLY_000463." + ], + "score": { + "usability_domain_length": 162 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000080/1.0.36", + "usability_domain": [ + "Differential Expression Glycosyltransferases in Human [taxid:9606] Cancer. These enzymes are mapped with disease ontology [DOID] retrieved from TCGA cancers. " + ], + "score": { + "usability_domain_length": 158 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000738/v-2.1.1", + "usability_domain": [ + "The Glycan Biomarkers Citations dataset contains details of the publications present in the Glycan Biomarkers dataset (https://data.glygen.org/GLY_000737)." + ], + "score": { + "usability_domain_length": 155 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000721/v-2.1.1", + "usability_domain": [ + "The Human Somatic Mutation Citations contains details of the publication present in the dataset Human Somatic Mutation https://data.glygen.org/GLY_000462." + ], + "score": { + "usability_domain_length": 154 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000025/1.0.25", + "usability_domain": [ + "List of mouse [taxid:10090] genes with normal RNA-Seq and Affymetrix expression data from Bgee for anatomical entities associated with cancer." + ], + "score": { + "usability_domain_length": 142 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000555/v-2.0.2", + "usability_domain": [ + "The file includes glycan annotation (GlyTouCan ac and ChEBI Id) for the protein + site included in UniProtKB(https://uniprot.org/) database. " + ], + "score": { + "usability_domain_length": 141 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000023/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] genes with normal RNA-Seq and Affymetrix expression data from Bgee for anatomical entities associated with cancer." + ], + "score": { + "usability_domain_length": 141 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000555/v-2.1.1", + "usability_domain": [ + "The file includes glycan annotation (GlyTouCan ac and ChEBI Id) for the protein + site included in UniProtKB(https://uniprot.org/) database. " + ], + "score": { + "usability_domain_length": 141 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000694/v-2.0.2", + "usability_domain": [ + "Mouse Xref The O-GlcNAc Database contains mouse (taxid:10090) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 140 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000694/v-2.1.1", + "usability_domain": [ + "Mouse Xref The O-GlcNAc Database contains mouse (taxid:10090) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 140 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000050/1.0", + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "score": { + "usability_domain_length": 139 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000553/v-2.0.2", + "usability_domain": [ + "Human Xref The O-GlcNAc Database contains human (taxid:9606) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 139 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000553/v-2.1.1", + "usability_domain": [ + "Human Xref The O-GlcNAc Database contains human (taxid:9606) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 139 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000731/v-2.1.1", + "usability_domain": [ + "SARS-CoV2 UniProtKB xref GlyConnect dataset contains sarscov2 (taxid: 2697049) UniProtKB canonical accessions mapped to the GlyConnect IDs." + ], + "score": { + "usability_domain_length": 139 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000695/v-2.0.2", + "usability_domain": [ + "Rat Xref The O-GlcNAc Database contains rat (taxid:10116) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 136 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000695/v-2.1.1", + "usability_domain": [ + "Rat Xref The O-GlcNAc Database contains rat (taxid:10116) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." + ], + "score": { + "usability_domain_length": 136 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000529/v-2.0.2", + "usability_domain": [ + "The file contain disease data associated to protein/gene downloaded from GlyCosmos database. [https://glycosmos.org/homes/download]. " + ], + "score": { + "usability_domain_length": 133 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000529/v-2.1.1", + "usability_domain": [ + "The file contain disease data associated to protein/gene downloaded from GlyCosmos database. [https://glycosmos.org/homes/download]. " + ], + "score": { + "usability_domain_length": 133 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000724/v-2.1.1", + "usability_domain": [ + "Mouse Xref O-GlcNAcAtlas dataset contains mouse (taxid: 10090) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." + ], + "score": { + "usability_domain_length": 133 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000723/v-2.1.1", + "usability_domain": [ + "Human Xref O-GlcNAcAtlas dataset contains human (taxid: 9606) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." + ], + "score": { + "usability_domain_length": 132 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000725/v-2.1.1", + "usability_domain": [ + "Rat Xref O-GlcNAcAtlas dataset contains rat (taxid: 10116) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." + ], + "score": { + "usability_domain_length": 129 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000729/v-2.1.1", + "usability_domain": [ + "Mouse UniProtKB xref GlyConnect dataset contains mouse(taxid: 10090) UniProtKB canonical accessions mapped to the GlyConnect IDs." + ], + "score": { + "usability_domain_length": 129 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000728/v-2.1.1", + "usability_domain": [ + "Human UniProtKB xref GlyConnect dataset contains human(taxid: 9606) UniProtKB canonical accessions mapped to the GlyConnect IDs." + ], + "score": { + "usability_domain_length": 128 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000538/v-2.0.2", + "usability_domain": [ + "The dataset provides citations for the associated glycan (GlyTouCan Accession) from dataset http://data.glygen.org/GLY_000528. " + ], + "score": { + "usability_domain_length": 127 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000587/v-2.0.2", + "usability_domain": [ + "The dataset provides all glycans (identified by GlyTouCan accession) from GlyTouCan. Source database: https://glytoucan.org/. " + ], + "score": { + "usability_domain_length": 126 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000587/v-2.1.1", + "usability_domain": [ + "The dataset provides all glycans (identified by GlyTouCan accession) from GlyTouCan. Source database: https://glytoucan.org/. " + ], + "score": { + "usability_domain_length": 126 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000730/v-2.1.1", + "usability_domain": [ + "Rat UniProtKB xref GlyConnect dataset contains rat(taxid: 10116) UniProtKB canonical accessions mapped to the GlyConnect IDs." + ], + "score": { + "usability_domain_length": 125 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000045/1.0.36", + "usability_domain": [ + "This dataset contains public cancer biomarkers retrieved from the Early Detection Research Network (EDRN)." + ], + "score": { + "usability_domain_length": 106 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000019/1.0.25", + "usability_domain": [ + "This dataset contains public cancer biomarkers retrieved from the Early Detection Research Network (EDRN)." + ], + "score": { + "usability_domain_length": 106 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000031/1.0.25", + "usability_domain": [ + "List of one to one orthologous Ensembl gene identifiers and HGNC/MGI gene symbols for humans and mice." + ], + "score": { + "usability_domain_length": 102 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000004/0.0", + "usability_domain": [ + "A workflow to assemble raw COVID19 virus Illumina sequencing reads, using the SPADES assembler." + ], + "score": { + "usability_domain_length": 95 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000595/v-2.0.2", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000594/v-2.0.2", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000596/v-2.0.2", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000594/v-2.1.1", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000595/v-2.1.1", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000596/v-2.1.1", + "usability_domain": [ + "", + "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " + ], + "score": { + "usability_domain_length": 94 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000240/1.0", + "usability_domain": [ + "Prediction of research topic of publication based on topic of cited papers on citation graph" + ], + "score": { + "usability_domain_length": 92 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000011/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] mRNAs with literature evidence of expression in lung cancer" + ], + "score": { + "usability_domain_length": 86 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000052/1.0.36", + "usability_domain": [ + "List of human [taxid:9606] genes with literature evidence of mutation in all cancers." + ], + "score": { + "usability_domain_length": 85 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000007/1.0", + "usability_domain": [ + "", + "Workflow metagenomic annotation for sequencing reads and phylogenetic tree creation." + ], + "score": { + "usability_domain_length": 84 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000020/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] genes with literature evidence of mutation in all cancers" + ], + "score": { + "usability_domain_length": 84 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000014/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] genes with literature evidence of mutation in lung cancer" + ], + "score": { + "usability_domain_length": 84 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000010/1.0.25", + "usability_domain": [ + "List of human [taxid:9606] miRNAs with literature evidence of expression in cancer" + ], + "score": { + "usability_domain_length": 82 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000006/1.0", + "usability_domain": [ + "Processing NGS exome sequencing reads. alignment, gene assembly, variant calling" + ], + "score": { + "usability_domain_length": 80 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000527/v-2.0.2", + "usability_domain": [ + "The dataset includes GlyTouCan accessions included in Sandbox application." + ], + "score": { + "usability_domain_length": 74 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000527/v-2.1.1", + "usability_domain": [ + "The dataset includes GlyTouCan accessions included in Sandbox application." + ], + "score": { + "usability_domain_length": 74 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000410/1.0", + "usability_domain": [ + "Test BCO for sample of OncoMX data for the biomarker-partnership." + ], + "score": { + "usability_domain_length": 65 + } + }, + { + "object_id": "https://biocomputeobject.org/ARGOS_000004/1.4", + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences." + ], + "score": { + "usability_domain_length": 59 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000476/v-2.0.2", + "usability_domain": [ + "The dataset provides rdf fomat file for the glycan data. " + ], + "score": { + "usability_domain_length": 57 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000476/v-2.1.1", + "usability_domain": [ + "The dataset provides rdf fomat file for the glycan data. " + ], + "score": { + "usability_domain_length": 57 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000005/0.0", + "usability_domain": [ + "Comparison of COVID19 assembled genomes for variants" + ], + "score": { + "usability_domain_length": 52 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000082/1.0.36", + "usability_domain": [ + "Human Cancer Glycosyltranferases retrieved from TCGA" + ], + "score": { + "usability_domain_length": 52 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000008/1.0", + "usability_domain": [ + "Peak finder for CHIPseq read data" + ], + "score": { + "usability_domain_length": 33 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000016/3.0", + "usability_domain": [ + "This is the workflow annotation" + ], + "score": { + "usability_domain_length": 31 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000016/3.1", + "usability_domain": [ + "This is the workflow annotation" + ], + "score": { + "usability_domain_length": 31 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000600/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000601/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000606/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000603/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000604/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000605/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000602/v-2.0.2", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000601/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000602/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000603/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000604/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000605/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000606/v-2.1.1", + "usability_domain": [ + "-" + ], + "score": { + "usability_domain_length": 1 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000001/1.0", + "usability_domain": [ + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000607/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000522/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000565/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000611/v-2.0.2", + "usability_domain": [ + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000608/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000509/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000592/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000549/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000550/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000562/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000619/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000546/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000575/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000548/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000617/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000551/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000519/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000563/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000618/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000547/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000561/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000507/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000564/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000610/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000508/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000593/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000540/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000609/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000572/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000577/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000545/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000679/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000526/v-2.0.2", + "usability_domain": [ + "", + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000627/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000566/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000521/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000542/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000591/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000570/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000569/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000612/v-2.0.2", + "usability_domain": [ + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000588/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000520/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000567/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000543/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000571/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000590/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000589/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000568/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000677/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000678/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000699/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000544/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000560/v-2.0.2", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000316/3.0", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/OMX_000093/1.0.36", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000507/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000508/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000509/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000519/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000520/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000521/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000522/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000526/v-2.1.1", + "usability_domain": [ + "", + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000540/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000542/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000543/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000544/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000545/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000546/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000547/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000548/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000549/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000550/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000551/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000560/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000561/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000562/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000563/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000564/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000565/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000566/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000567/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000568/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000569/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000570/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000571/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000572/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000575/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000577/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000588/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000589/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000590/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000591/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000592/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000593/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000607/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000608/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000610/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000611/v-2.1.1", + "usability_domain": [ + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000612/v-2.1.1", + "usability_domain": [ + "" + ], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000617/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000618/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000619/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000627/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000677/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000678/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000679/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000699/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000609/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GLY_000733/v-2.1.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/GALXY_000027/0.0", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000476/3.0", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000478/3.0", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000480/3.1", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + }, + { + "object_id": "https://biocomputeobject.org/BCO_000482/3.0", + "usability_domain": [], + "score": { + "usability_domain_length": 0 + } + } +] \ No newline at end of file diff --git a/docs/refactor.md b/docs/refactor.md new file mode 100644 index 00000000..3560859f --- /dev/null +++ b/docs/refactor.md @@ -0,0 +1,68 @@ +# Planned Changes for 24.04 release + +## Proposed changes + +### Provide `One Click` examples that work for Swagger +- GlyGen and ARGOS do this already. + +### simplify API models and processing + - previous model was based on multiple DB requests per object and each request could have buld sumissions + - We still want bulk submissions but validations and permissions should be checked by classes and serializers before pining DB +#### Examples: +- POST_api_objects_draft_create + +### Handeling what will become `Legacy` requests +1. maintain the old code, and not publicize it +2. develope converter functions to process + + +### Refactor the groups user permissions +- previous model was based on additional objects for Groups and Users, This required the use of `signals` and meant that there were many additional objects in the DB each time a new user was created. This also led to dependancy issues which prohibits deleting anything. +- propose to elimiate the extra models + +### Refactor the Prefix permission system +- Prefix required it's own two groups and the creation of 5 permissions for each prefix. Look up for authentication was time consuming and taxing on DB. Users also had no idea how to use the system, just what to do to make it work. +- Propose to add `authorized groups` to the prefix model. if it is empty then anyone can use it. If populated than only those in list can use it + +### Refactor the BCO permission system +- same situation as prefix + +## Permissions + +- BCO has `owner`, `auth_group` and `auth_user` +- Prefix has `owner`, and `auth_group` + +## Items to look at later +- `authentication.apis.RegisterUserNoVerificationAPI` has no swagger or tests +- fix email and secrets +- install a `test_template` for Swagger responses +- provide example values that are usable for testing APIs. +- certifying key for prefix as a JWT? +- owner = models.ForeignKey( + User, + on_delete=models.CASCADE, +- need tests for token +- unwanted swagger endpoints +- need tests for token +- prefix api documentation and portal docs for prefix +- Remove ETag from Portal + +Prefix Perms: + add -> create new DRAFT + edit -> Change existing Draft + delete -> Delete Draft + publish -> Publish Draft + view -> View/download + ONLY if private + + If prefix is public anyone can view, but only auth users can modify. + +## Things to look for when reviewing code: +### For Swaggar: + - Each swaggar endpoint has a "one click" working example. + ### For functions: + - variable names are consistant and make sense + - all functions have documentation. This shoudl include: + - descriptions + - explicit parameters/inputs and outputs/returns + - hoverover should display function documentation diff --git a/manage.py b/manage.py index d7818f7b..d28672ea 100755 --- a/manage.py +++ b/manage.py @@ -6,7 +6,7 @@ def main(): """Run administrative tasks.""" - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bcodb.settings") + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings") try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/api/scripts/__init__.py b/prefix/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from api/scripts/__init__.py rename to prefix/__init__.py diff --git a/prefix/admin.py b/prefix/admin.py new file mode 100644 index 00000000..14176112 --- /dev/null +++ b/prefix/admin.py @@ -0,0 +1,7 @@ +"""Prefix Admin Pannel +""" + +from django.contrib import admin +from prefix.models import Prefix + +admin.site.register(Prefix) \ No newline at end of file diff --git a/prefix/apis.py b/prefix/apis.py new file mode 100644 index 00000000..3e0cbcce --- /dev/null +++ b/prefix/apis.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +#prefix/api.py + +from drf_yasg import openapi +from drf_yasg.utils import swagger_auto_schema +from rest_framework import status +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +from rest_framework.views import APIView +from config.services import legacy_api_converter, response_constructor +from prefix.services import PrefixSerializer, delete_prefix +from prefix.selectors import get_prefix_object, get_user_prefixes + +user_permissions = {"tester": ["view_TEST", "publish_TEST"]} + +NOPUB_data ={ + "pk": "NOPUB", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "public": False, + "counter": 0, + "user_permissions": { + "tester": [ + "view_NOPUB", + "add_NOPUB", + "change_NOPUB", + "delete_NOPUB" + ], + "bco_api_user": [ + "view_NOPUB", + "add_NOPUB", + "change_NOPUB", + "delete_NOPUB", + "publish_NOPUB" + ] + } +} + +USER_PERMISSIONS_SCHEMA = openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["user", "permissions"], + example=user_permissions, + properties={ + "user": openapi.Schema( + type=openapi.TYPE_STRING, + description="User for permissions to be modified", + ), + "permissions": openapi.Schema( + type=openapi.TYPE_ARRAY, + description="List of permissiosn to apply", + items=openapi.Schema( + type=openapi.TYPE_STRING + ) + ) + + } +) + +PREFIX_MODIFY_SCHEMA = openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Prefix Modify Schema", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["prefix"], + properties={ + "prefix": openapi.Schema( + type=openapi.TYPE_STRING, + description="The Prefix to be modified.", + example="TEST" + ), + "description": openapi.Schema( + type=openapi.TYPE_STRING, + description="A description of what this prefix should represent. For example, the prefix 'GLY' would be related to BCOs which were derived from GlyGen workflows.", + example="Test prefix description." + ), + "user_permissions": USER_PERMISSIONS_SCHEMA, + "public": openapi.Schema( + type=openapi.TYPE_BOOLEAN, + description="Flag to set permissions.", + example=True + ) + }, + ) +) + +class PrefixesCreateApi(APIView): + """ + Create a Prefix [Bulk Enabled] + + Create a prefix to be used to classify BCOs and to determine permissions + for objects created under that prefix. + + """ + + permission_classes = [IsAuthenticated,] + + @swagger_auto_schema( + operation_id="api_prefixes_create", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Prefix Schema", + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["prefix"], + properties={ + "prefix": openapi.Schema( + type=openapi.TYPE_STRING, + description="Any prefix which satsifies the naming standard.", + example="test" + ), + "description": openapi.Schema( + type=openapi.TYPE_STRING, + description="A description of what this prefix should represent. For example, the prefix 'GLY' would be related to BCOs which were derived from GlyGen workflows.", + example="Test prefix description." + ), + "certifying_key": openapi.Schema( + type=openapi.TYPE_STRING, + description="Hash of server and date-time of creation.", + example="12345678910" + ), + "public": openapi.Schema( + type=openapi.TYPE_BOOLEAN, + description="Flag to set permissions.", + example=True + ) + }, + ) + ), + responses={ + 201: "The prefix was successfully created.", + 400: "Bad request for one of two reasons: \n1) the prefix does not" + "follow the naming standard, or \n2) owner_user and/or" + "owner_group do not exist.", + 401: "Unauthorized. Authentication credentials were not provided.", + 403: "Forbidden. User doesnot have permission to perform this action", + 409: "The prefix the requestor is attempting to create already exists.", + }, + tags=["Prefix Management"], + ) + + def post(self, request) -> Response: + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + if 'POST_api_prefixes_create' in request.data: + data = legacy_api_converter(request.data) + + if data[0]['prefix']=='test' and data[0]['public'] is True: + return Response( + status=status.HTTP_201_CREATED, + data=[response_constructor( + identifier="TEST", + status = "SUCCESS", + code= 201, + message= f"Testing: Prefix TEST created" + )] + ) + + for index, object in enumerate(data): + response_id = object.get("prefix", index).upper() + prefix_data = PrefixSerializer(data=object, context={'request': request}) + + if prefix_data.is_valid(): + prefix_data.create(prefix_data.validated_data) + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 201, + message= f"Prefix {response_id} created", + )) + accepted_requests = True + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"Prefix {response_id} rejected", + data=prefix_data.errors + )) + rejected_requests = True + + if accepted_requests is False and rejected_requests == True: + return Response( + status=status.HTTP_400_BAD_REQUEST, + data=response_data + ) + + if accepted_requests is True and rejected_requests is True: + return Response( + status=status.HTTP_207_MULTI_STATUS, + data=response_data + ) + + if accepted_requests is True and rejected_requests is False: + return Response( + status=status.HTTP_201_CREATED, + data=response_data + ) + + return Response(status=status.HTTP_201_CREATED, data=response_data) + +class PrefixesDeleteApi(APIView): + """Delete a Prefix [Bulk Enabled] + + The requestor *must* be the prefix owner to delete a prefix. + + __Any object created under this prefix will have its permissions + "locked out." This means that any other view which relies on object-level + permissions, such as /api/objects/drafts/read/, will not allow any + requestor access to particular objects.__ + """ + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_id="api_prefixes_delete", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Prefix Deletion Schema", + description="Provide a list of prefixes to delete.", + items=openapi.Schema( + type=openapi.TYPE_STRING, + example="TEST" + ) + ), + responses={ + 200: "Deleting a prefix was successful.", + 401: "Unauthorized. Authentication credentials were not provided.", + 403: "Forbidden. User does not have permission to perform this action", + 404: "The prefix couldn't be found so therefore it could not be deleted.", + }, + tags=["Prefix Management"], + ) + + def post(self, request) -> Response: + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + + if "POST_api_prefixes_delete" in request.data: + data = legacy_api_converter(request.data) + + if data[0] == "TEST": + return Response( + status=status.HTTP_201_CREATED, + data=[response_constructor( + identifier="TEST", + status = "SUCCESS", + code= 200, + message= f"Testing: Prefix TEST deleted" + )] + ) + for index, object in enumerate(data): + response_id = object + response_status = delete_prefix(object, requester) + + if response_status is True: + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 200, + message= f"Prefix {response_id} deleted", + )) + accepted_requests = True + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"Prefix {response_id} NOT deleted", + data=response_status + )) + rejected_requests = True + + if accepted_requests is False: + return Response( + status=status.HTTP_400_BAD_REQUEST, + data=response_data + ) + + if accepted_requests is True and rejected_requests is True: + return Response( + status=status.HTTP_207_MULTI_STATUS, + data=response_data + ) + + if accepted_requests is True and rejected_requests is False: + return Response( + status=status.HTTP_200_OK, + data=response_data + ) + + return Response(status=status.HTTP_201_CREATED, data=response_data) + +class PrefixGetInfoApi(APIView): + """Get Prefix Info [Bulk Enabled] + + Returns a serialized Prefix instance. If the Prefix is not public and the + requestor has the apropirate permissions then a dictionary with users + and the associated Prefix permisssions will also be included. + """ + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_id="api_prefixes_info", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Prefix Info Schema", + description="Retrieve a serialized Prefix instance.", + items=openapi.Schema( + type=openapi.TYPE_STRING, + example="NOPUB" + ) + ), + responses={ + 200: "Retrieving prefix info was successful.", + 401: "Unauthorized. Authentication credentials were not provided.", + 403: "Forbidden. User does not have permission to perform this action", + 404: "That prefix could not be found.", + }, + tags=["Prefix Management"], + ) + + def post(self, request) -> Response: + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + + if request.data[0] == "NOPUB": + return Response( + status=status.HTTP_200_OK, + data=NOPUB_data + ) + + for index, object in enumerate(data): + response_id = object + response_object = get_prefix_object(object) + + try: + if response_object['public'] is True or \ + requester.username in response_object['user_permissions']: + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 200, + message= f"Prefix {response_id} retrieved", + data=response_object + )) + accepted_requests = True + else: + response_data.append(response_constructor( + identifier=response_id, + status = "FORBIDDEN", + code= 403, + message= f"User, {requester}, does not have permissions for this Prefix, {response_id}.", + )) + rejected_requests = True + + except TypeError: + if response_object is None: + response_data.append(response_constructor( + identifier=response_id, + status = "NOT FOUND", + code= 404, + message= f"That Prefix, {response_id}, does not exist.", + )) + rejected_requests = True + else: + response_data.append(response_constructor( + identifier=response_id, + status = "BAD REQUEST", + code= 400, + message= f"There was a problem with that Prefix, {response_id}.", + )) + rejected_requests = True + + if accepted_requests is False: + return Response( + status=status.HTTP_400_BAD_REQUEST, + data=response_data + ) + + if accepted_requests is True and rejected_requests is True: + return Response( + status=status.HTTP_207_MULTI_STATUS, + data=response_data + ) + + if accepted_requests is True and rejected_requests is False: + return Response( + status=status.HTTP_200_OK, + data=response_data + ) + + return Response(status=status.HTTP_200_OK, data=response_data) + +class PrefixesModifyApi(APIView): + """Modify a Prefix [Bulk Enabled] + + Modify a prefix which already exists. + + The requestor *must* be the owner to modify a prefix. + """ + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + operation_id="api_prefixes_modify", + request_body=PREFIX_MODIFY_SCHEMA, + responses={ + 200: "The prefix was successfully modified.", + 400: "Bad request because owner_user and/or owner_group do not exist.", + 404: "The prefix provided could not be found.", + }, + tags=["Prefix Management"], + ) + def post(self, request) -> Response: + response_data = [] + requester = request.user + data = request.data + rejected_requests = False + accepted_requests = False + + if "POST_api_prefixes_modify" in request.data: + data = legacy_api_converter(request.data) + + for index, object in enumerate(data): + response_id = object.get("prefix", index).upper() + prefix = PrefixSerializer(data=object, context={'request': request}) + + if prefix.is_valid(): + if requester == prefix.validated_data['owner']: + prefix_update = prefix.update(prefix.validated_data) + response_data.append(response_constructor( + identifier=response_id, + status = "SUCCESS", + code= 200, + message= f"Prefix {response_id} updated", + data=prefix_update + )) + accepted_requests = True + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"Requester does not have permissions to modify {response_id}", + data=prefix.errors + )) + rejected_requests = True + + else: + response_data.append(response_constructor( + identifier=response_id, + status = "REJECTED", + code= 400, + message= f"Prefix {response_id} update rejected", + data=prefix.errors + )) + rejected_requests = True + + if accepted_requests is False and rejected_requests == True: + return Response( + status=status.HTTP_400_BAD_REQUEST, + data=response_data + ) + + if accepted_requests is True and rejected_requests is True: + return Response( + status=status.HTTP_207_MULTI_STATUS, + data=response_data + ) + + if accepted_requests is True and rejected_requests is False: + return Response( + status=status.HTTP_200_OK, + data=response_data + ) + + return Response(status=status.HTTP_201_CREATED, data=response_data) + +class PrefixesForUserApi(APIView): + """Get Prefixes for User + + Returns a list of prefixes the requestor is permitted to use. + """ + + permission_classes = [IsAuthenticated] + + @swagger_auto_schema( + manual_parameters=[ + openapi.Parameter( + "Authorization", + openapi.IN_HEADER, + description="Authorization Token", + type=openapi.TYPE_STRING, + default="Token 627626823549f787c3ec763ff687169206626149" + ) + ], + responses={ + 200: "Authorization is successful.", + 403: "Forbidden. Authentication credentials were not provided.", + 403: "Invalid token" + }, + tags=["Prefix Management"], + ) + + def post(self, request) -> Response: + return Response( + status=status.HTTP_200_OK, data=get_user_prefixes(request.user) + ) \ No newline at end of file diff --git a/api/scripts/method_specific/__init__.py b/prefix/apps.py old mode 100755 new mode 100644 similarity index 100% rename from api/scripts/method_specific/__init__.py rename to prefix/apps.py diff --git a/prefix/migrations/0001_initial.py b/prefix/migrations/0001_initial.py new file mode 100644 index 00000000..bda7b305 --- /dev/null +++ b/prefix/migrations/0001_initial.py @@ -0,0 +1,30 @@ +# Generated by Django 3.2.13 on 2024-04-02 20:08 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Prefix', + fields=[ + ('prefix', models.CharField(max_length=5, primary_key=True, serialize=False)), + ('certifying_key', models.TextField(blank=True, null=True)), + ('created', models.DateTimeField(blank=True, default=django.utils.timezone.now, null=True)), + ('description', models.TextField(blank=True, null=True)), + ('counter', models.IntegerField(default=0, help_text='Counter for object_id asignment')), + ('public', models.BooleanField(default=True, help_text='Boolean field to indicate if there are restrictions on the use of this prefix')), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, to_field='username')), + ], + ), + ] diff --git a/api/scripts/utilities/__init__.py b/prefix/migrations/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from api/scripts/utilities/__init__.py rename to prefix/migrations/__init__.py diff --git a/prefix/models.py b/prefix/models.py new file mode 100644 index 00000000..c01cf361 --- /dev/null +++ b/prefix/models.py @@ -0,0 +1,34 @@ +from django.db import models +from django.contrib.auth.models import Group, User +from django.utils import timezone + +class Prefix(models.Model): + """ + """ + + prefix = models.CharField(primary_key=True, max_length=5) + certifying_key = models.TextField(blank=True, null=True) + created = models.DateTimeField( + default=timezone.now, + blank=True, + null=True + ) + description = models.TextField(blank=True, null=True) + owner = models.ForeignKey( + User, + on_delete=models.CASCADE, + to_field="username" + ) + counter = models.IntegerField( + default=0, + help_text="Counter for object_id asignment" + ) + public = models.BooleanField( + default=True, + help_text= "Boolean field to indicate if there are restrictions on "\ + + "the use of this prefix" + ) + + def __str__(self): + """String for representing the BCO model (in Admin site etc.).""" + return f"{self.prefix}" \ No newline at end of file diff --git a/prefix/selectors.py b/prefix/selectors.py new file mode 100644 index 00000000..82433f32 --- /dev/null +++ b/prefix/selectors.py @@ -0,0 +1,156 @@ +# prefix/selectors.py + +"""Prefix Selectors + +Functions to query the database related to Prefixes +""" + +from django.core.serializers import serialize +from django.contrib.auth.models import User, Permission +from django.db import utils +from prefix.models import Prefix + +def user_can_publish_prefix(user: User, prefix_name:str) -> bool: + """User Can Publish + + Takes a prefix name and user. Returns a bool if the user can publish a BCO + with the prefix if it exists. If the prefix does not exist `None` is + returned. + """ + + try: + Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return None + codename = f"publish_{prefix_name}" + user_prefixes = get_user_prefixes(user) + return codename in user_prefixes + +def user_can_modify_prefix(user: User, prefix_name:str) -> bool: + """User Can Modify + + Takes a prefix name and user. Returns a bool if the user can modify a BCO + with the prefix if it exists. If the prefix does not exist `None` is + returned. + """ + + try: + Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return None + codename = f"change_{prefix_name}" + user_prefixes = get_user_prefixes(user) + + return codename in user_prefixes + +def user_can_draft_prefix(user: User, prefix_name:str) -> bool: + """User Can Draft + + Takes a prefix name and user. Returns a bool if the user can draft a BCO + with the prefix if it exists. If the prefix does not exist `None` + is returned. + """ + + try: + Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return None + codename = f"add_{prefix_name}" + user_prefixes = get_user_prefixes(user) + + return codename in user_prefixes + +def user_can_view_prefix(prefix_name:str, user: User) -> bool: + """User Can View + + Takes a prefix name and user. Returns a bool if the user can view a BCO + with the prefix if it exists. If the prefix does not exist `None` + is returned. + """ + + try: + prefix_instance = Prefix.objects.get(prefix=prefix_name) + if prefix_instance.public is True: + return True + except Prefix.DoesNotExist: + return None + codename = f"view_{prefix_name}" + user_prefixes = get_user_prefixes(user) + + return codename in user_prefixes + +def get_user_prefixes(user: User) -> list: + """Get User Prefixes + Retrieves a User's Prefix Permissions + + Compiles a list of permissions associated with prefixes that a given user + has access to, including permissions for public prefixes. + + Note: + This function fetches permissions for public prefixes as well as those + directly assigned to the user via user permissions. + """ + + prefix_permissions = [] + + public_prefixes = Prefix.objects.filter(public=True) + for prefix_instance in public_prefixes: + for perm in [ "view", "add", "change", "delete", "publish"]: + codename = f"{perm}_{prefix_instance.prefix}" + prefix_permissions.append(codename) + for permission in user.user_permissions.all(): + prefix_permissions.append(permission.codename) + + return prefix_permissions + + +def get_prefix_object(prefix_name:str) -> dict: + """Get Prefix Object + + Returns a serialized Prefix instance. If the Prefix is not public then + a dictionary with users and the associated Prefix permisssions will also + be included. + """ + + try: + prefix_instance = Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return None + + prefix_object = { #serialize('python', [prefix_instance])[0] + "pk": prefix_instance.pk, + "created": prefix_instance.created, + "description": prefix_instance.description, + "owner": prefix_instance.owner.username, + "public": prefix_instance.public, + "counter": prefix_instance.counter + } + if prefix_instance.public is False: + prefix_permissions = get_prefix_permissions(prefix_name) + prefix_object["user_permissions"] = prefix_permissions + return prefix_object + +def get_prefix_permissions(prefix_name:str) -> dict: + """Get Prefix Permissions + + Returns a dictionary with users and the associated Prefix permisssions. + """ + + users_permissions = {} + perms = [] + for perm in [ "view", "add", "change", "delete", "publish"]: + codename = f"{perm}_{prefix_name}" + try: + perms.append(Permission.objects.get(codename__exact=codename)) + except Permission.DoesNotExist: + pass + + for perm in perms: + users_with_perm = User.objects.filter(user_permissions=perm).prefetch_related('user_permissions') + for user in users_with_perm: + if user.username not in users_permissions: + users_permissions[user.username] = [] + if perm.codename not in users_permissions[user.username]: + users_permissions[user.username].append(perm.codename) + + return users_permissions diff --git a/prefix/services.py b/prefix/services.py new file mode 100644 index 00000000..9553663a --- /dev/null +++ b/prefix/services.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python3 +# prefix/services.py + +from django.conf import settings +from django.contrib.auth.models import User, Permission +from django.contrib.contenttypes.models import ContentType +from django.db import transaction, utils +from django.db.models import F +from django.utils import timezone +from prefix.models import Prefix +from prefix.selectors import get_prefix_object, get_prefix_permissions +from rest_framework import serializers + +"""Prefix Services + +Service functions for working with BCO Prefixes +""" + +class PrefixSerializer(serializers.Serializer): + """Serializer for Prefix instances. + + For validation and serialization of Prefix data. + + Fields: + - prefix (CharField): + A unique identifier for the Prefix, with a length constraint between 3 to 5 characters. It is automatically converted to upper case. + - description (CharField): + A textual description of the Prefix. + - user_permissions (JSONField): + A JSON structure detailing specific user permissions related to the Prefix. This field is optional. + - public (BooleanField): A flag indicating whether the Prefix is public or private. + This field is not required and defaults to `False` if not provided. + + Methods: + - validate(self, attrs): Validates the Prefix data. + - create(self, validated_data): Creates a new Prefix instance from the validated data. + - update(self, instance, validated_data): Updates an existing Prefix instance based + on the validated data. + + Note: The create and update operations are performed within a database transaction to + ensure data integrity. + """ + + prefix = serializers.CharField(min_length=3, max_length=5) + description = serializers.CharField() + user_permissions = serializers.JSONField(required=False, default={}) + public = serializers.BooleanField(required=False) + + def validate(self, attrs): + """Prefix Validator + + Validates incoming Prefix data against business rules and integrity constraints. + + It ensures the prefix is unique (for creation), exists (for updates), and assigns + the Prefix's owner based on the current request's user. It also converts the prefix + to upper case for consistency. + + Parameters: + - attrs (dict): The incoming Prefix data to validate. + + Returns: + - dict: The validated Prefix data, potentially modified (e.g., upper-cased prefix). + + Raises: + - serializers.ValidationError: If the prefix violates uniqueness or existence constraints. + """ + + request = self.context.get('request') + attrs["owner"] = request.user + attrs['prefix'] = attrs['prefix'].upper() + prefix_name = attrs['prefix'] + + try: + attrs["prefix"] = Prefix.objects.get(prefix=prefix_name) + if "create" in request.path_info: + raise serializers.ValidationError({"prefix_name": f"That Prefix, {prefix_name}, already exists."}) + attrs["owner"] = attrs["prefix"].owner + except Prefix.DoesNotExist: + if "create" in request.path_info: + pass + else: + raise serializers.ValidationError({"prefix_name": f"That Prefix, {prefix_name}, was not found."}) + + return attrs + + @transaction.atomic + def create(self, validated_data): + """Create function for Prefix + + Creates a Prefix instance from the validated data. + + It handles the 'public' attribute specifically to manage permissions associated + with the Prefix. The 'user_permissions' field is ignored as it does not correspond + to a model field. + + Parameters: + - validated_data (dict): The data that has passed validation checks. + + Returns: + - Prefix: The newly created Prefix instance. + """ + + validated_data.pop('user_permissions') + + try: + public = validated_data['public'] + except KeyError: + public, validated_data['public'] = True, True + + prefix_instance = Prefix.objects.create(**validated_data, created=timezone.now()) + + if public is False: + create_permissions_for_prefix(prefix_instance) + prefix_instance.save() + return prefix_instance + + @transaction.atomic + def update(self, validated_data): + """Update function for Prefix + + Updates an existing Prefix instance based on the validated data. + + It checks the ownership before applying changes, updates the Prefix's public status, + and manages user permissions accordingly. + + Parameters: + - instance (Prefix): The Prefix instance to update. + - validated_data (dict): The data that has passed validation checks. + + Returns: + - Prefix: The updated Prefix instance. + + Raises: + - PermissionError: If the current user does not own the Prefix. + """ + + prefix_instance = Prefix.objects.get(prefix=validated_data['prefix']) + prefix_name = prefix_instance.prefix + if prefix_instance.owner != validated_data['owner']: + return "denied" + if prefix_instance.public != validated_data['public']: + #TODO: handle adding/deleting permissions for change of public status + # add permissions to public -> private + # Remove permissions to private -> public + prefix_instance.public = validated_data['public'] + old_perms = get_prefix_permissions(prefix_name=prefix_name) + if validated_data['user_permissions'] != old_perms: + update_user_permissions( + prefix_name= prefix_name, + old_perms=old_perms, + new_perms=validated_data['user_permissions'] + ) + + prefix_instance.description = validated_data.get( + 'description', prefix_instance.description + ) + prefix_instance.save() + prefix_object = get_prefix_object(prefix_name) + return prefix_object + +def update_user_permissions(prefix_name:str, new_perms:dict, old_perms:dict): + """ + Update user permissions based on a provided mapping of users to + permissions. Only modifies permissions related to the specified prefix. + + Step 1: Build a list of permissions associated with the prefix + Step 2: Iterate over users to update each user's permissions + Step 3: Determine which permissions to add and which to remove + Step 4: Apply permission updates + """ + + # Build a list of permissions associated with the prefix + prefix_permissions = [] + for perm_type in ["view", "add", "change", "delete", "publish"]: + codename = f"{perm_type}_{prefix_name}" + try: + perm = Permission.objects.get(codename=codename) + prefix_permissions.append(perm) + except Permission.DoesNotExist: + pass + + prefix_permissions_dict = {perm.codename: perm for perm in prefix_permissions} + + # Set of all users mentioned in either new or old perms + all_users = set(new_perms.keys()) | set(old_perms.keys()) + + for username in all_users: + try: + user = User.objects.get(username=username) + # Current permissions from old_perms or empty if not previously set + current_perms = set( + prefix_permissions_dict.get(perm_codename) + for perm_codename in old_perms.get(username, []) + if perm_codename in prefix_permissions_dict + ) + + # New permissions from new_perms or empty if not provided + new_perms_set = set( + prefix_permissions_dict.get(perm_codename) + for perm_codename in new_perms.get(username, []) + if perm_codename in prefix_permissions_dict + ) + + # Determine permissions to add and to remove + perms_to_add = new_perms_set - current_perms + perms_to_remove = current_perms - new_perms_set + + # Apply permission updates + if perms_to_add: + user.user_permissions.add(*perms_to_add) + if perms_to_remove: + user.user_permissions.remove(*perms_to_remove) + + except User.DoesNotExist: + # Optionally handle the case where the user doesn't exist + pass + + +def create_permissions_for_prefix(instance=Prefix): + """Prefix Permission Creation + + Creates permissions for a Prefix if it is not public. Owner is assigned + all permissions and then can add permissions to other users. + + 'view' -> View/download Prefix drafts + 'add' -> create new drafts for Prefix + 'change' -> Change existing drafts for Prefix + 'delete' -> Delete drafts for Prefix + 'publish' -> Publish drafts for Prefix + """ + + try: + for perm in [ "view", "add", "change", "delete", "publish"]: + new_perm = Permission.objects.create( + name="Can " + perm + " BCOs with prefix " + instance.prefix, + content_type=ContentType.objects.get(app_label="prefix", model="prefix"), + codename=perm + "_" + instance.prefix,) + instance.owner.user_permissions.add(new_perm) + + except utils.IntegrityError: + # The permissions already exist. + pass + +def prefix_counter_increment(prefix_instance: Prefix) -> int: + """Prefix Counter Increment + + Simple incrementing function. + Counter for BCO object_id asignment. + """ + + prefix_instance.counter = F('counter') + 1 + prefix_instance.save() + + prefix_instance.refresh_from_db() + + return prefix_instance.counter + +@transaction.atomic +def delete_prefix(prefix_name: str, user: User) -> bool: + """Delete Prefix + + Deletes a prefix and the permissions. + `view` and `delete` permissions are not removed so that existing BCOs can + still be viewed or individually removed. + + 'add' -> create new drafts for Prefix + 'change' -> Change existing drafts for Prefix + 'publish' -> Publish drafts for Prefix + """ + + try: + prefix_instance = Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return f"That prefix, {prefix_name}, does not exist." + + if prefix_instance.owner == user: + prefix_instance.delete() + if prefix_instance.public is False: + for perm in ["add", "change", "publish"]: + try: + Permission.objects.get(codename=f"{perm}_{prefix_name}").delete() + except Permission.DoesNotExist: + pass + return True + + return f"You do not have permissions to delete that prefix, {prefix_name}." diff --git a/prefix/urls.py b/prefix/urls.py new file mode 100644 index 00000000..d7fc4a31 --- /dev/null +++ b/prefix/urls.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# prefix/urls.py + +"""Prefix URLs +""" + +from django.urls import path +from prefix.apis import ( + PrefixesCreateApi, + PrefixesDeleteApi, + PrefixesModifyApi, + PrefixGetInfoApi, + PrefixesForUserApi +) + +urlpatterns = [ + path("prefixes/create/", PrefixesCreateApi.as_view()), + path("prefixes/delete/", PrefixesDeleteApi.as_view()), + path("prefixes/modify/", PrefixesModifyApi.as_view()), + path("prefixes/user/", PrefixesForUserApi.as_view()), + path("prefixes/info/", PrefixGetInfoApi.as_view()), +] \ No newline at end of file diff --git a/search/apis.py b/search/apis.py index c1beca12..f55f1b15 100644 --- a/search/apis.py +++ b/search/apis.py @@ -1,86 +1,189 @@ # search/apis.py + import json +from biocompute.models import Bco +from django.db.models import Q +from django.contrib.auth.models import User from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status from rest_framework.permissions import AllowAny from rest_framework.response import Response from rest_framework.views import APIView -from search.selectors import search_db, controled_list +from search.selectors import controled_list, RETURN_VALUES +from search.selectors import RETURN_VALUES as return_values from itertools import chain +from config.services import legacy_api_converter -class SearchObjectsAPI(APIView): +class SearchUsersAPI(APIView): """ - Search the BCODB + Search the BCODB for Users ------------------- + Provides an API endpoint for querying users. This endpoint will eventually + support multiple query parameters for flexible search capabilities, but + currently only allows submission of a single username. + + Example usage with curl: + ```shell + curl -X GET "http://localhost:8000/api/users/search/?username=tester" -H "accept: application/json" + ``` + + This API view is accessible to any user with authentication. + """ + + @swagger_auto_schema( + operation_id="api_users_search", + manual_parameters=[ + openapi.Parameter('username', + openapi.IN_QUERY, + description="Search BCODB for a username.", + type=openapi.TYPE_STRING, + default="tester" + ) + ], + responses={ + 200: "User Found", + 404: "User not found" + }, + tags=["Database Searches"], + ) + + def get(self, request) -> Response: + username = request.GET["username"] + try: + user = User.objects.get(username=username) + return Response(status=status.HTTP_200_OK, data=user.username) + except User.DoesNotExist: + return Response(status=status.HTTP_404_NOT_FOUND, data=username) - Endpoint for use of query string based search. - Four parameters are defined by this API: - 1. contents: Search in the contents of the BCO - 2. prefix: BCO Prefix to search - 3. owner_user: Search by BCO owner - 4. object_id: BCO object_id to search for +class SearchObjectsAPI(APIView): + """ + Search the BCODB for BCOs + + ------------------- + Provides an API endpoint for querying BioCompute Objects (BCOs) based on + various attributes. This endpoint supports multiple query parameters for + flexible search capabilities. - Shell + Example usage with curl: ```shell - curl -X GET "http://localhost:8000/api/objects/?contents=review&prefix=BCO&owner_user=bco_api_user&object_id=DRAFT" -H "accept: application/json" + curl -X GET "http://localhost:8000/api/objects/?contents=review&prefix=BCO&owner=tester&object_id=BCO" -H "accept: application/json" ``` + + This API view is accessible to any user without authentication requirements. """ permission_classes = [AllowAny] - auth = openapi.Parameter('test', openapi.IN_QUERY, description="test manual param", type=openapi.TYPE_BOOLEAN) @swagger_auto_schema( + operation_id="api_objects_search", manual_parameters=[ + openapi.Parameter('object_id', + openapi.IN_QUERY, + description="Search BCO Object Identifier, and primary key.", + type=openapi.TYPE_STRING + ), openapi.Parameter('contents', openapi.IN_QUERY, - description="Search in the contents of the BCO", + description="Search in the BCO JSON contents.", type=openapi.TYPE_STRING ), openapi.Parameter('prefix', openapi.IN_QUERY, - description="BCO Prefix to search", + description="BCO Prefix to search for.", type=openapi.TYPE_STRING ), - openapi.Parameter('owner_user', + openapi.Parameter('owner', openapi.IN_QUERY, - description="Search by BCO owner", + description="Search by User Name that 'owns' the object", type=openapi.TYPE_STRING ), - openapi.Parameter('object_id', + openapi.Parameter('authorized_users', + openapi.IN_QUERY, + description="Search by users who have access to the BCO", + type=openapi.TYPE_STRING + ), + openapi.Parameter('state', + openapi.IN_QUERY, + description="State of object. REFERENCED, PUBLISHED, DRAFT, and"\ + + "DELETE are currently accepted values", + type=openapi.TYPE_STRING, + default="published" + ), + openapi.Parameter('score', openapi.IN_QUERY, - description="BCO object_id to search for", + description="Score assigned to BCO at the time of publishing."\ + + " Draft objects will not have a score.", + type=openapi.TYPE_STRING + ), + openapi.Parameter('last_update', + openapi.IN_QUERY, + description="Date Time object for the last database change to this"\ + + " object", + type=openapi.TYPE_STRING + ), + openapi.Parameter('access_count', + openapi.IN_QUERY, + description="Then number of times this object has been downloaded or"\ + + " viewed.", type=openapi.TYPE_STRING ) ], responses={ 200: "Search successfull" }, - tags=["BCO Management"], + tags=["Database Searches"], ) def get(self, request) -> Response: - return_values = [ - "contents", - "last_update", - "object_class", - "object_id", - "owner_group", - "owner_user", - "prefix", - "schema", - "state", - ] - search = dict(request.GET) - result = controled_list(request.user) - for query, value in search.items(): - for item in value: - if query == 'owner_user': - filter = f'{query}' - else: - filter = f'{query}__icontains' - result = search_db(filter, item, result) - search_result = chain(result.values(*return_values)) - return Response(status=status.HTTP_200_OK, data={search_result}) + viewable_bcos = controled_list(request.user) + + query = Q() + + for field in return_values: + values = request.GET.getlist(field) + if values: + field_query = Q() + for value in values: + field_query |= Q(**{f'{field}__icontains': value}) + query &= field_query + + return_bco = viewable_bcos.filter(query) + bco_data = chain(return_bco.values(*return_values)) + return Response(status=status.HTTP_200_OK, data=bco_data) + +class DepreciatedSearchObjectsAPI(SearchObjectsAPI): + swagger_schema = None + + def post(self, request) -> Response: + """ + This POST method is deprecated. + Please use GET instead. + """ + viewable_bcos = controled_list(request.user) + data = legacy_api_converter(request.data) + query = Q() + for object in data: + if object["type"] == "mine": + field_query = Q() + field_query |= Q(**{'owner': request.user}) + query &= field_query + if object["type"] == "prefix": + field_query = Q() + if object["search"] == "": + field_query |= Q() + else: + field_query |= Q(**{"prefix": object["search"]}) + query &= field_query + if object["type"] == "bco_id": + field_query = Q() + if object["search"] == "": + field_query |= Q() + else: + field_query |= Q(**{"object_id": object["search"]}) + query &= field_query + return_bco = viewable_bcos.filter(query) + bco_data = chain(return_bco.values(*return_values)) + return Response(status=status.HTTP_200_OK, data=bco_data) \ No newline at end of file diff --git a/search/selectors.py b/search/selectors.py index 68dcb090..cdff15d8 100644 --- a/search/selectors.py +++ b/search/selectors.py @@ -4,56 +4,73 @@ Set of selector functions to handle searching the BCODB """ -from api.models import BCO +from biocompute.models import Bco from django.db.models import QuerySet from django.db.models.query import QuerySet from django.contrib.auth.models import User -from guardian.shortcuts import get_objects_for_user -from itertools import chain -from api.scripts.utilities.UserUtils import UserUtils +from prefix.selectors import get_user_prefixes -return_values = [ - "contents", - "last_update", - "object_class", +RETURN_VALUES = [ "object_id", - "owner_group", - "owner_user", + "contents", "prefix", - "schema", + "owner", + "authorized_users", "state", + "score", + "last_update", + "access_count", ] -def search_db(filter:str, value:str, result:QuerySet)-> QuerySet: - """Search DB - Takes a filter, a value, and a result query set and uses them to return - a more refined query set. +def controled_list(user: User) -> QuerySet: + """ + Generates a list of viewable BioCompute Objects (BCOs) based on the user's + permissions and roles. + + This function determines the set of BCOs a user is authorized to view based + on two criteria: + 1. Prefix Permissions: BCOs associated with prefixes for which the user + has 'view_' permissions. + 2. Authorization: BCOs where the user is explicitly listed as an authorized + user. + + The function excludes BCOs in the 'DELETE' state for all users and + additionally excludes BCOs in the 'DRAFT' state for non-authenticated or + anonymous users. + + Parameters: + - user (User): + A User object representing the authenticated user. + + Returns: + - QuerySet: + A Django QuerySet containing the BCOs that the user is authorized to + view. This QuerySet is distinct to ensure no duplicates are included. """ - new_result = result.filter(**{filter: value}) - print(len(result), ': ', len(new_result)) - return new_result + prefix_permissions = get_user_prefixes(user=user) + viewable_prefixes = [ + perm.split("_")[1] for perm in prefix_permissions + if perm.startswith("view_") + ] -def controled_list(user: User) -> QuerySet: - """User Controlled List - Takes a User object and returns a list of accessable BCOs based on their - permissions. - """ + if user.username == "AnonymousUser" or user.username == "": + bcos_by_permission = Bco.objects.filter( + prefix__prefix__in=viewable_prefixes).exclude(state="DELETE" + ).exclude(state="DRAFT") + + return bcos_by_permission.distinct() - prefix_list = [] - results_list = BCO.objects.none() - raw_prefixes = UserUtils().prefix_perms_for_user(user_object=user) - for prefix in raw_prefixes : - pre = prefix.split("_")[1] - if pre not in prefix_list and pre != "prefix": - prefix_list.append(pre) - - for prefix in prefix_list: - if user.username == "AnonymousUser" or user.username == "": - bco_list = BCO.objects.filter(prefix=prefix).values().exclude(state="DELETE").exclude(state="DRAFT") - else: - bco_list = BCO.objects.filter(prefix=prefix).values().exclude(state="DELETE") - results_list = results_list | bco_list - - return results_list + bcos_by_permission = Bco.objects.filter( + prefix__prefix__in=viewable_prefixes + ).exclude(state="DELETE") + + bcos_by_authorized = Bco.objects.filter( + authorized_users=user + ).exclude(state="DELETE") + + viewable_bcos = bcos_by_permission | bcos_by_authorized + viewable_bcos = viewable_bcos.distinct() + + return viewable_bcos diff --git a/search/urls.py b/search/urls.py index bf787ac2..a4a887f2 100644 --- a/search/urls.py +++ b/search/urls.py @@ -1,8 +1,10 @@ # search/urls.py from django.urls import path, re_path -from search.apis import SearchObjectsAPI +from search.apis import SearchObjectsAPI, DepreciatedSearchObjectsAPI, SearchUsersAPI urlpatterns = [ - re_path(r'objects/$', SearchObjectsAPI.as_view()), + re_path(r'objects/$', DepreciatedSearchObjectsAPI.as_view()), + re_path(r'objects/search/$', SearchObjectsAPI.as_view()), + re_path(r'users/search/$', SearchUsersAPI.as_view()), ] \ No newline at end of file diff --git a/server.conf b/server.conf deleted file mode 100644 index 209f01d2..00000000 --- a/server.conf +++ /dev/null @@ -1,74 +0,0 @@ -# This is the main server configuration file for the BCO API. - -# --- Production and publishing flags --- # -# NOTE: Valid values are True or False (note the capitalization). -[PRODUCTION] -production=False - -# DB Version -[VERSION] -version=23.09 - -# NOTE: Valid values are True or False (note the capitalization). -# Is this a publish-only server? -[PUBLISHONLY] -publishonly=False - -# --- Security settings --- # - -# Create a key for an anonymous public user. -[KEYS] -anon=627626823549f787c3ec763ff687169206626149 - -[DATABASES] -# default settting -path=./db.sqlite3 -# path=/Users/hadleyking/GitHub/biocompute-objects/dev/data/db.sqlite3 - -# Which host names do you want to associate with the server? -# Note: the local hostname (i.e. 127.0.0.1) should come at the end. -[HOSTNAMES] -prod_names=test.portal.biochemistry.gwu.edu,127.0.0.1 -names=127.0.0.1:8000,127.0.0.1 - -# Give the human-readable hostnames -[HRHOSTNAME] -hrnames=BCO Server (Default) - -# The public hostname of the server (i.e. the one to make requests to) -[PUBLICHOSTNAME] -prod_name=https://test.portal.biochemistry.gwu.edu -name=http://127.0.0.1:8000 - -# Who gets to make requests? -[REQUESTS_FROM] -portal=https://test.portal.biochemistry.gwu.edu -local_development_portal=http://127.0.0.1:3000,http://localhost:3000 -public=true - -# --- Namings --- # -# How do you want to name your objects? -[OBJECT_NAMING] -prod_root_uri=https://test.portal.biochemistry.gwu.edu -root_uri=http://127.0.0.1:8000 -prod_uri_regex=root_uri/prefix_(\d+)/(\d+).(\d+) -uri_regex=root_uri/prefix_(\d+)/(\d+).(\d+) - -# --- Requests --- # -# Where are the request templates defined? -[REQUESTS] -folder=../api/request_definitions/ - -# --- Group and Prefix creation --- # -# To enable all users to create a prefix, group creation needs to be enabled. -# When a prefix is created it has assocaited groups created as well. If the -# user does not have permission to creat the needed groups the prefix creation -# will fail. -[GROUP_PREFIX] -allow_all_creation=True -allow_group_creation=True -allow_prefix_creation=True - -# Where are the validation templates defined? -# [VALIDATIONS] -# folder=../api/validation_definitions/ \ No newline at end of file diff --git a/sever.conf.example b/sever.conf.example deleted file mode 100644 index 454a7100..00000000 --- a/sever.conf.example +++ /dev/null @@ -1,74 +0,0 @@ -# This is the main server configuration file for the BCO API. - -# --- Production and publishing flags --- # -# NOTE: Valid values are True or False (note the capitalization). -[PRODUCTION] -production=False - -# DB Version -[VERSION] -version=22.11 - -# NOTE: Valid values are True or False (note the capitalization). -# Is this a publish-only server? -[PUBLISHONLY] -publishonly=False - -# --- Security settings --- # - -# Create a key for an anonymous public user. -[KEYS] -anon=627626823549f787c3ec763ff687169206626149 - -[DATABASES] -# default settting -path=./db.sqlite3 -# path=/Users/hadleyking/GitHub/biocompute-objects/dev/data/db.sqlite3 - -# Which host names do you want to associate with the server? -# Note: the local hostname (i.e. 127.0.0.1) should come at the end. -[HOSTNAMES] -prod_names=test.portal.biochemistry.gwu.edu,127.0.0.1 -names=127.0.0.1:8000,127.0.0.1 - -# Give the human-readable hostnames -[HRHOSTNAME] -hrnames=BCO Server (Default) - -# The public hostname of the server (i.e. the one to make requests to) -[PUBLICHOSTNAME] -prod_name=https://test.portal.biochemistry.gwu.edu -name=http://127.0.0.1:8000 - -# Who gets to make requests? -[REQUESTS_FROM] -portal=https://test.portal.biochemistry.gwu.edu -local_development_portal=http://127.0.0.1:3000,http://localhost:3000 -public=true - -# --- Namings --- # -# How do you want to name your objects? -[OBJECT_NAMING] -prod_root_uri=https://test.portal.biochemistry.gwu.edu -root_uri=http://127.0.0.1:8000 -prod_uri_regex=root_uri/prefix_(\d+)/(\d+).(\d+) -uri_regex=root_uri/prefix_(\d+)/(\d+).(\d+) - -# --- Requests --- # -# Where are the request templates defined? -[REQUESTS] -folder=../api/request_definitions/ - -# --- Group and Prefix creation --- # -# To enable all users to create a prefix, group creation needs to be enabled. -# When a prefix is created it has assocaited groups created as well. If the -# user does not have permission to creat the needed groups the prefix creation -# will fail. -[GROUP_PREFIX] -allow_all_creation=True -allow_group_creation=True -allow_prefix_creation=True - -# Where are the validation templates defined? -# [VALIDATIONS] -# folder=../api/validation_definitions/ \ No newline at end of file diff --git a/tests/fixtures/bco_dump.json b/tests/fixtures/bco_dump.json new file mode 100644 index 00000000..16722d66 --- /dev/null +++ b/tests/fixtures/bco_dump.json @@ -0,0 +1,5216 @@ +[ + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 13, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 14, + "fields": { + "contents": { + "object_id": "", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2023-09-05T18:10:23", + "modified": "2023-09-05T18:10:23.167Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "DRAFT", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2023-09-24T09:16:04.123Z" + } + } +] \ No newline at end of file diff --git a/tests/fixtures/old_test_data.json b/tests/fixtures/old_test_data.json new file mode 100644 index 00000000..eeb429b7 --- /dev/null +++ b/tests/fixtures/old_test_data.json @@ -0,0 +1,7030 @@ +[ + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2022-06-28T23:06:35.693Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 2, + "fields": { + "action_time": "2022-06-28T23:08:10.571Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 3, + "fields": { + "action_time": "2022-06-28T23:09:47.922Z", + "user": 6, + "content_type": 10, + "object_id": "1", + "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 4, + "fields": { + "action_time": "2022-06-28T23:12:37.828Z", + "user": 6, + "content_type": 10, + "object_id": "2", + "object_repr": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 5, + "fields": { + "action_time": "2022-06-28T23:14:01.431Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 6, + "fields": { + "action_time": "2022-06-28T23:16:50.236Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 7, + "fields": { + "action_time": "2022-06-28T23:19:25.710Z", + "user": 6, + "content_type": 10, + "object_id": "3", + "object_repr": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 8, + "fields": { + "action_time": "2022-06-28T23:21:05.713Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 9, + "fields": { + "action_time": "2022-06-28T23:21:43.425Z", + "user": 6, + "content_type": 10, + "object_id": "4", + "object_repr": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 10, + "fields": { + "action_time": "2022-06-28T23:23:00.080Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 11, + "fields": { + "action_time": "2022-06-28T23:23:13.087Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 12, + "fields": { + "action_time": "2022-06-28T23:41:21.155Z", + "user": 6, + "content_type": 10, + "object_id": "6", + "object_repr": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 13, + "fields": { + "action_time": "2022-06-28T23:43:57.562Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 14, + "fields": { + "action_time": "2022-06-28T23:44:43.690Z", + "user": 6, + "content_type": 10, + "object_id": "5", + "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 15, + "fields": { + "action_time": "2023-01-13T13:24:15.001Z", + "user": 6, + "content_type": 4, + "object_id": "6", + "object_repr": "bco_api_user", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Email address\"]}}]" + } + }, + { + "model": "admin.logentry", + "pk": 16, + "fields": { + "action_time": "2023-01-13T13:24:24.242Z", + "user": 6, + "content_type": 4, + "object_id": "6", + "object_repr": "bco_api_user", + "action_flag": 2, + "change_message": "[]" + } + }, + { + "model": "authentication.authentication", + "pk": 1, + "fields": { + "username": "bco_api_user", + "auth_service": [ + { + "iss": "Reeya1", + "sub": "ReeyaGupta1" + } + ] + } + }, + { + "model": "auth.permission", + "pk": 1, + "fields": { + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" + } + }, + { + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, + "fields": { + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add new_users", + "content_type": 9, + "codename": "add_new_users" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change new_users", + "content_type": 9, + "codename": "change_new_users" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete new_users", + "content_type": 9, + "codename": "delete_new_users" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view new_users", + "content_type": 9, + "codename": "view_new_users" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add bco", + "content_type": 10, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change bco", + "content_type": 10, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete bco", + "content_type": 10, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view bco", + "content_type": 10, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add prefix_table", + "content_type": 11, + "codename": "add_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change prefix_table", + "content_type": 11, + "codename": "change_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete prefix_table", + "content_type": 11, + "codename": "delete_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view prefix_table", + "content_type": 11, + "codename": "view_prefix_table" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add group info", + "content_type": 12, + "codename": "add_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change group info", + "content_type": 12, + "codename": "change_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete group info", + "content_type": 12, + "codename": "delete_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view group info", + "content_type": 12, + "codename": "view_groupinfo" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can add BCOs with prefix BCO", + "content_type": 10, + "codename": "add_BCO" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can change BCOs with prefix BCO", + "content_type": 10, + "codename": "change_BCO" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can delete BCOs with prefix BCO", + "content_type": 10, + "codename": "delete_BCO" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can view BCOs with prefix BCO", + "content_type": 10, + "codename": "view_BCO" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can draft BCOs with prefix BCO", + "content_type": 10, + "codename": "draft_BCO" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can publish BCOs with prefix BCO", + "content_type": 10, + "codename": "publish_BCO" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can add group object permission", + "content_type": 14, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can change group object permission", + "content_type": 14, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can delete group object permission", + "content_type": 14, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can view group object permission", + "content_type": 14, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can add user object permission", + "content_type": 15, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can change user object permission", + "content_type": 15, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can delete user object permission", + "content_type": 15, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can view user object permission", + "content_type": 15, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can add BCOs with prefix TEST", + "content_type": 10, + "codename": "add_TEST" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can change BCOs with prefix TEST", + "content_type": 10, + "codename": "change_TEST" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can delete BCOs with prefix TEST", + "content_type": 10, + "codename": "delete_TEST" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can view BCOs with prefix TEST", + "content_type": 10, + "codename": "view_TEST" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can draft BCOs with prefix TEST", + "content_type": 10, + "codename": "draft_TEST" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can publish BCOs with prefix TEST", + "content_type": 10, + "codename": "publish_TEST" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can add BCOs with prefix OTHER", + "content_type": 10, + "codename": "add_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 74, + "fields": { + "name": "Can change BCOs with prefix OTHER", + "content_type": 10, + "codename": "change_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 75, + "fields": { + "name": "Can delete BCOs with prefix OTHER", + "content_type": 10, + "codename": "delete_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 76, + "fields": { + "name": "Can view BCOs with prefix OTHER", + "content_type": 10, + "codename": "view_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 77, + "fields": { + "name": "Can draft BCOs with prefix OTHER", + "content_type": 10, + "codename": "draft_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 78, + "fields": { + "name": "Can publish BCOs with prefix OTHER", + "content_type": 10, + "codename": "publish_OTHER" + } + }, + { + "model": "auth.permission", + "pk": 79, + "fields": { + "name": "Can add blacklisted token", + "content_type": 16, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 80, + "fields": { + "name": "Can change blacklisted token", + "content_type": 16, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 81, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 16, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 82, + "fields": { + "name": "Can view blacklisted token", + "content_type": 16, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 83, + "fields": { + "name": "Can add authentication", + "content_type": 17, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 84, + "fields": { + "name": "Can change authentication", + "content_type": 17, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 85, + "fields": { + "name": "Can delete authentication", + "content_type": 17, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 86, + "fields": { + "name": "Can view authentication", + "content_type": 17, + "codename": "view_authentication" + } + }, + { + "model": "authtoken.token", + "pk": "07801a1a4cdbf1945e22ac8439f1db27fe813f7a", + "fields": { + "user": 6, + "created": "2022-05-10T20:35:53.548Z" + } + }, + { + "model": "authtoken.token", + "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "fields": { + "user": 8, + "created": "2022-05-10T20:53:42.503Z" + } + }, + { + "model": "authtoken.token", + "pk": "166c6a8f7e6e34827f4231a37c73f4ff985b43a2", + "fields": { + "user": 5, + "created": "2022-05-10T20:35:14.846Z" + } + }, + { + "model": "authtoken.token", + "pk": "2f2a599026581c158a07f968c56292c77f4be875", + "fields": { + "user": 2, + "created": "2022-05-10T20:35:14.509Z" + } + }, + { + "model": "authtoken.token", + "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "fields": { + "user": 9, + "created": "2022-05-10T20:54:44.797Z" + } + }, + { + "model": "authtoken.token", + "pk": "627626823549f787c3ec763ff687169206626149", + "fields": { + "user": 3, + "created": "2022-05-10T20:35:14.520Z" + } + }, + { + "model": "authtoken.token", + "pk": "8d66642cb77c4cb55af75d0f6c4f2835f805dcaa", + "fields": { + "user": 4, + "created": "2022-05-10T20:35:14.695Z" + } + }, + { + "model": "authtoken.token", + "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "fields": { + "user": 7, + "created": "2022-05-10T20:50:39.096Z" + } + }, + { + "model": "auth.group", + "pk": 1, + "fields": { + "name": "bco_drafter", + "permissions": [ + 53, + 54, + 55, + 57, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 2, + "fields": { + "name": "bco_publisher", + "permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.group", + "pk": 3, + "fields": { + "name": "anon", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 4, + "fields": { + "name": "wheel", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 5, + "fields": { + "name": "group_admins", + "permissions": [ + 9, + 10, + 11, + 12 + ] + } + }, + { + "model": "auth.group", + "pk": 6, + "fields": { + "name": "prefix_admins", + "permissions": [ + 49, + 50, + 51, + 52 + ] + } + }, + { + "model": "auth.group", + "pk": 7, + "fields": { + "name": "AnonymousUser", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 8, + "fields": { + "name": "bco_api_user", + "permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 9, + "fields": { + "name": "test50", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 10, + "fields": { + "name": "hivelab37", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 11, + "fields": { + "name": "jdoe58", + "permissions": [] + } + }, + { + "model": "auth.group", + "pk": 12, + "fields": { + "name": "test_drafter", + "permissions": [ + 67, + 68, + 69, + 71, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 13, + "fields": { + "name": "test_publisher", + "permissions": [ + 67, + 68, + 69, + 71, + 72, + 70 + ] + } + }, + { + "model": "auth.group", + "pk": 14, + "fields": { + "name": "other_drafter", + "permissions": [ + 73, + 74, + 75, + 77, + 76 + ] + } + }, + { + "model": "auth.group", + "pk": 15, + "fields": { + "name": "other_publisher", + "permissions": [ + 73, + 74, + 75, + 77, + 78, + 76 + ] + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", + "last_login": null, + "is_superuser": false, + "username": "bco_drafter", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.496Z", + "groups": [ + 1 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 2, + "fields": { + "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", + "last_login": null, + "is_superuser": false, + "username": "bco_publisher", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.506Z", + "groups": [ + 1, + 2 + ], + "user_permissions": [ + 53, + 54, + 55, + 57, + 58, + 56 + ] + } + }, + { + "model": "auth.user", + "pk": 3, + "fields": { + "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", + "last_login": null, + "is_superuser": false, + "username": "anon", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.517Z", + "groups": [ + 3 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", + "last_login": "2023-07-24T14:30:51.334Z", + "is_superuser": true, + "username": "wheel", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.528Z", + "groups": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:35:14.844Z", + "groups": [ + 1, + 2, + 7 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", + "last_login": "2023-01-14T12:21:51.437Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "", + "last_name": "", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2022-05-10T20:35:53Z", + "groups": [ + 1, + 2, + 6, + 8, + 12, + 13, + 14, + 15 + ], + "user_permissions": [ + 73, + 67, + 74, + 68, + 75, + 69, + 77, + 71, + 78, + 72, + 76, + 70 + ] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "test50", + "first_name": "", + "last_name": "", + "email": "test@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39.093Z", + "groups": [ + 1, + 2, + 9, + 12 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 8, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab37", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [ + 1, + 2, + 10 + ], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 9, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe58", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [ + 1, + 2, + 11 + ], + "user_permissions": [] + } + }, + { + "model": "contenttypes.contenttype", + "pk": 1, + "fields": { + "app_label": "admin", + "model": "logentry" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 2, + "fields": { + "app_label": "auth", + "model": "permission" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 3, + "fields": { + "app_label": "auth", + "model": "group" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 4, + "fields": { + "app_label": "auth", + "model": "user" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 5, + "fields": { + "app_label": "contenttypes", + "model": "contenttype" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 6, + "fields": { + "app_label": "sessions", + "model": "session" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 7, + "fields": { + "app_label": "authtoken", + "model": "token" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 8, + "fields": { + "app_label": "authtoken", + "model": "tokenproxy" + } + }, + { + "model": "sessions.session", + "pk": "3qd5skoalb5ly4dsuqcb5esqj5j9qhzt", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGK4j:oPTKR7Co3iXf6S_I2K7tHYWcNu2MvGhg3SLWUEGUzsE", + "expire_date": "2023-01-27T13:26:29.682Z" + } + }, + { + "model": "sessions.session", + "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", + "expire_date": "2022-07-12T15:13:16.934Z" + } + }, + { + "model": "sessions.session", + "pk": "860l0sgsts303jiqqkbfbqb1b5drzber", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pEUh2:UTjS-6q1AeDxypKMT47pePYR8tWXEHAwMgYl2r2Hxyw", + "expire_date": "2023-01-22T12:22:28.297Z" + } + }, + { + "model": "sessions.session", + "pk": "a1s21bvi44c07la3bvv30q8l4brogz4a", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1ozlTd:D4Ba4G-L6eSml3IKbwxb0QnLb51bSoCBiJxJc54lJyI", + "expire_date": "2022-12-12T21:15:45.128Z" + } + }, + { + "model": "sessions.session", + "pk": "aa07l502m3oxm2o0frkoiatmmj18v4rq", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2pCRwiAu3fcMBJhBqgaS0q6Md7dNutDtf-_9t_BhXYpfO89-InEVWpx-txjSk-sO6BHqvcnU6jJPUe6KPGiXYyN-3Q7376CEXrZ6AOcAdLIZUJtM0RoeOGcYABHOREEhW5U1XxyhwYS8ydGSMhYokvh8AdKJN94:1qNwaJ:Oh_qoPjlNq2dgzb4RfJ7b0_I2y-thVrhsRf7mqxxJEs", + "expire_date": "2023-08-07T14:30:51.336Z" + } + }, + { + "model": "sessions.session", + "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", + "expire_date": "2022-05-24T20:46:02.514Z" + } + }, + { + "model": "sessions.session", + "pk": "cjhktkzqh9exi5w3ys7fqkwvhmylt5rg", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pFco8:V4jc9iYIUck8A4V8iBucgsTCAdddFirKT2xPXWjJjPw", + "expire_date": "2023-01-25T15:14:28.952Z" + } + }, + { + "model": "sessions.session", + "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", + "expire_date": "2022-05-24T20:54:17.321Z" + } + }, + { + "model": "sessions.session", + "pk": "e1an9x1y34jwxw2m7x6gj989vfge552q", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGK1i:AVY30CitrKS74D_bd_s3QRH3bbGVuZjvLMgphfALevg", + "expire_date": "2023-01-27T13:23:22.911Z" + } + }, + { + "model": "sessions.session", + "pk": "efp83ves8qp00gawsisfoz7qk4xzne97", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGKDF:uLjZXoiMuoii6tEGiAGqeWq8PdfHBHAZNX9gj11RfC8", + "expire_date": "2023-01-27T13:35:17.624Z" + } + }, + { + "model": "sessions.session", + "pk": "h4m4295sj843xvqmfajlds3dkvuq1um0", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGfXj:WcrCO7Drey9jEUGWdxJWuH9KiEiYrYx-7GiD0IdXPZU", + "expire_date": "2023-01-28T12:21:51.438Z" + } + }, + { + "model": "sessions.session", + "pk": "hcfujqufa0xy2alniz38tke3dqm9eqof", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pFeup:24vcZq-yo1W12kLp1aKc78gPX7Ei9bzFG-cQ9_dwnC8", + "expire_date": "2023-01-25T17:29:31.162Z" + } + }, + { + "model": "sessions.session", + "pk": "lout2mwzatqi98yxtlt87gsfy9zrha63", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1olDuY:YIHthO6KrZicQixjS7_vlWVZK2C0zwDtrvKZK0X60YI", + "expire_date": "2022-11-02T18:35:26.147Z" + } + }, + { + "model": "sessions.session", + "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", + "expire_date": "2022-07-12T23:05:37.317Z" + } + }, + { + "model": "sessions.session", + "pk": "zpzeih1sxl2o0krggucjc7ta0jiwvdwf", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1otEuA:P-lliBdzVvpKmP6zYqrZRWQ-ueoPUC9JRAI6JViGalY", + "expire_date": "2022-11-24T21:16:10.253Z" + } + }, + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 13, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 14, + "fields": { + "contents": { + "object_id": "", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2023-09-05T18:10:23", + "modified": "2023-09-05T18:10:23.167Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "DRAFT", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2023-09-24T09:16:04.123Z" + } + }, + { + "model": "api.groupinfo", + "pk": 1, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Group administrators", + "expiration": null, + "group": "group_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 2, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Prefix administrators", + "expiration": null, + "group": "prefix_admins", + "max_n_members": -1, + "owner_user": "wheel" + } + }, + { + "model": "api.groupinfo", + "pk": 3, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 4, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just a test prefix.", + "expiration": null, + "group": "test_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 5, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_drafter", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.groupinfo", + "pk": 6, + "fields": { + "delete_members_on_group_deletion": false, + "description": "Just an other prefix.", + "expiration": null, + "group": "other_publisher", + "max_n_members": -1, + "owner_user": "bco_api_user" + } + }, + { + "model": "api.prefix_table", + "pk": 1, + "fields": { + "n_objects": 8, + "prefix": "BCO" + } + }, + { + "model": "api.prefix_table", + "pk": 2, + "fields": { + "n_objects": 3, + "prefix": "TEST" + } + }, + { + "model": "api.prefix_table", + "pk": 3, + "fields": { + "n_objects": 3, + "prefix": "OTHER" + } + }, + { + "model": "api.prefix", + "pk": 1, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T20:35:14.712Z", + "created_by": "bco_publisher", + "description": null, + "expires": null, + "owner_group": "bco_publisher", + "owner_user": "bco_publisher", + "prefix": "BCO" + } + }, + { + "model": "api.prefix", + "pk": 2, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:32.633Z", + "created_by": "bco_api_user", + "description": "Just a test prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "TEST" + } + }, + { + "model": "api.prefix", + "pk": 3, + "fields": { + "certifying_server": null, + "certifying_key": null, + "created": "2022-05-10T21:48:35.104Z", + "created_by": "bco_api_user", + "description": "Just an other prefix.", + "expires": null, + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER" + } + }, + { + "model": "authentication.authentication", + "pk": 1, + "fields": { + "username": "bco_api_user", + "auth_service": [ + { + "iss": "Reeya1", + "sub": "ReeyaGupta1" + }, + { + "iss": "string", + "sub": "string" + } + ] + } + } +] \ No newline at end of file diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index eeb429b7..e293db5d 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -1,7030 +1,4319 @@ [ - { - "model": "admin.logentry", - "pk": 1, - "fields": { - "action_time": "2022-06-28T23:06:35.693Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 2, - "fields": { - "action_time": "2022-06-28T23:08:10.571Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 3, - "fields": { - "action_time": "2022-06-28T23:09:47.922Z", - "user": 6, - "content_type": 10, - "object_id": "1", - "object_repr": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 4, - "fields": { - "action_time": "2022-06-28T23:12:37.828Z", - "user": 6, - "content_type": 10, - "object_id": "2", - "object_repr": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 5, - "fields": { - "action_time": "2022-06-28T23:14:01.431Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 6, - "fields": { - "action_time": "2022-06-28T23:16:50.236Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 7, - "fields": { - "action_time": "2022-06-28T23:19:25.710Z", - "user": 6, - "content_type": 10, - "object_id": "3", - "object_repr": "http://127.0.0.1:8000/BCO_000002/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 8, - "fields": { - "action_time": "2022-06-28T23:21:05.713Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "http://127.0.0.1:8000/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 9, - "fields": { - "action_time": "2022-06-28T23:21:43.425Z", - "user": 6, - "content_type": 10, - "object_id": "4", - "object_repr": "http://127.0.0.1:8000/BCO_000003/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 10, - "fields": { - "action_time": "2022-06-28T23:23:00.080Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "http://127.0.0.1:8000/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 11, - "fields": { - "action_time": "2022-06-28T23:23:13.087Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Owner group\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 12, - "fields": { - "action_time": "2022-06-28T23:41:21.155Z", - "user": 6, - "content_type": 10, - "object_id": "6", - "object_repr": "http://127.0.0.1:8000/OTHER_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 13, - "fields": { - "action_time": "2022-06-28T23:43:57.562Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 14, - "fields": { - "action_time": "2022-06-28T23:44:43.690Z", - "user": 6, - "content_type": 10, - "object_id": "5", - "object_repr": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Contents\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 15, - "fields": { - "action_time": "2023-01-13T13:24:15.001Z", - "user": 6, - "content_type": 4, - "object_id": "6", - "object_repr": "bco_api_user", - "action_flag": 2, - "change_message": "[{\"changed\": {\"fields\": [\"Email address\"]}}]" - } - }, - { - "model": "admin.logentry", - "pk": 16, - "fields": { - "action_time": "2023-01-13T13:24:24.242Z", - "user": 6, - "content_type": 4, - "object_id": "6", - "object_repr": "bco_api_user", - "action_flag": 2, - "change_message": "[]" - } - }, - { - "model": "authentication.authentication", - "pk": 1, - "fields": { - "username": "bco_api_user", - "auth_service": [ - { - "iss": "Reeya1", - "sub": "ReeyaGupta1" - } - ] - } - }, - { - "model": "auth.permission", - "pk": 1, - "fields": { - "name": "Can add log entry", - "content_type": 1, - "codename": "add_logentry" - } - }, - { - "model": "auth.permission", - "pk": 2, - "fields": { - "name": "Can change log entry", - "content_type": 1, - "codename": "change_logentry" - } - }, - { - "model": "auth.permission", - "pk": 3, - "fields": { - "name": "Can delete log entry", - "content_type": 1, - "codename": "delete_logentry" - } - }, - { - "model": "auth.permission", - "pk": 4, - "fields": { - "name": "Can view log entry", - "content_type": 1, - "codename": "view_logentry" - } - }, - { - "model": "auth.permission", - "pk": 5, - "fields": { - "name": "Can add permission", - "content_type": 2, - "codename": "add_permission" - } - }, - { - "model": "auth.permission", - "pk": 6, - "fields": { - "name": "Can change permission", - "content_type": 2, - "codename": "change_permission" - } - }, - { - "model": "auth.permission", - "pk": 7, - "fields": { - "name": "Can delete permission", - "content_type": 2, - "codename": "delete_permission" - } - }, - { - "model": "auth.permission", - "pk": 8, - "fields": { - "name": "Can view permission", - "content_type": 2, - "codename": "view_permission" - } - }, - { - "model": "auth.permission", - "pk": 9, - "fields": { - "name": "Can add group", - "content_type": 3, - "codename": "add_group" - } - }, - { - "model": "auth.permission", - "pk": 10, - "fields": { - "name": "Can change group", - "content_type": 3, - "codename": "change_group" - } - }, - { - "model": "auth.permission", - "pk": 11, - "fields": { - "name": "Can delete group", - "content_type": 3, - "codename": "delete_group" - } - }, - { - "model": "auth.permission", - "pk": 12, - "fields": { - "name": "Can view group", - "content_type": 3, - "codename": "view_group" - } - }, - { - "model": "auth.permission", - "pk": 13, - "fields": { - "name": "Can add user", - "content_type": 4, - "codename": "add_user" - } - }, - { - "model": "auth.permission", - "pk": 14, - "fields": { - "name": "Can change user", - "content_type": 4, - "codename": "change_user" - } - }, - { - "model": "auth.permission", - "pk": 15, - "fields": { - "name": "Can delete user", - "content_type": 4, - "codename": "delete_user" - } - }, - { - "model": "auth.permission", - "pk": 16, - "fields": { - "name": "Can view user", - "content_type": 4, - "codename": "view_user" - } - }, - { - "model": "auth.permission", - "pk": 17, - "fields": { - "name": "Can add content type", - "content_type": 5, - "codename": "add_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 18, - "fields": { - "name": "Can change content type", - "content_type": 5, - "codename": "change_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 19, - "fields": { - "name": "Can delete content type", - "content_type": 5, - "codename": "delete_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 20, - "fields": { - "name": "Can view content type", - "content_type": 5, - "codename": "view_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 21, - "fields": { - "name": "Can add session", - "content_type": 6, - "codename": "add_session" - } - }, - { - "model": "auth.permission", - "pk": 22, - "fields": { - "name": "Can change session", - "content_type": 6, - "codename": "change_session" - } - }, - { - "model": "auth.permission", - "pk": 23, - "fields": { - "name": "Can delete session", - "content_type": 6, - "codename": "delete_session" - } - }, - { - "model": "auth.permission", - "pk": 24, - "fields": { - "name": "Can view session", - "content_type": 6, - "codename": "view_session" - } - }, - { - "model": "auth.permission", - "pk": 25, - "fields": { - "name": "Can add Token", - "content_type": 7, - "codename": "add_token" - } - }, - { - "model": "auth.permission", - "pk": 26, - "fields": { - "name": "Can change Token", - "content_type": 7, - "codename": "change_token" - } - }, - { - "model": "auth.permission", - "pk": 27, - "fields": { - "name": "Can delete Token", - "content_type": 7, - "codename": "delete_token" - } - }, - { - "model": "auth.permission", - "pk": 28, - "fields": { - "name": "Can view Token", - "content_type": 7, - "codename": "view_token" - } - }, - { - "model": "auth.permission", - "pk": 29, - "fields": { - "name": "Can add token", - "content_type": 8, - "codename": "add_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 30, - "fields": { - "name": "Can change token", - "content_type": 8, - "codename": "change_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 31, - "fields": { - "name": "Can delete token", - "content_type": 8, - "codename": "delete_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 32, - "fields": { - "name": "Can view token", - "content_type": 8, - "codename": "view_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 33, - "fields": { - "name": "Can add new_users", - "content_type": 9, - "codename": "add_new_users" - } - }, - { - "model": "auth.permission", - "pk": 34, - "fields": { - "name": "Can change new_users", - "content_type": 9, - "codename": "change_new_users" - } - }, - { - "model": "auth.permission", - "pk": 35, - "fields": { - "name": "Can delete new_users", - "content_type": 9, - "codename": "delete_new_users" - } - }, - { - "model": "auth.permission", - "pk": 36, - "fields": { - "name": "Can view new_users", - "content_type": 9, - "codename": "view_new_users" - } - }, - { - "model": "auth.permission", - "pk": 37, - "fields": { - "name": "Can add bco", - "content_type": 10, - "codename": "add_bco" - } - }, - { - "model": "auth.permission", - "pk": 38, - "fields": { - "name": "Can change bco", - "content_type": 10, - "codename": "change_bco" - } - }, - { - "model": "auth.permission", - "pk": 39, - "fields": { - "name": "Can delete bco", - "content_type": 10, - "codename": "delete_bco" - } - }, - { - "model": "auth.permission", - "pk": 40, - "fields": { - "name": "Can view bco", - "content_type": 10, - "codename": "view_bco" - } - }, - { - "model": "auth.permission", - "pk": 41, - "fields": { - "name": "Can add prefix_table", - "content_type": 11, - "codename": "add_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 42, - "fields": { - "name": "Can change prefix_table", - "content_type": 11, - "codename": "change_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 43, - "fields": { - "name": "Can delete prefix_table", - "content_type": 11, - "codename": "delete_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 44, - "fields": { - "name": "Can view prefix_table", - "content_type": 11, - "codename": "view_prefix_table" - } - }, - { - "model": "auth.permission", - "pk": 45, - "fields": { - "name": "Can add group info", - "content_type": 12, - "codename": "add_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 46, - "fields": { - "name": "Can change group info", - "content_type": 12, - "codename": "change_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 47, - "fields": { - "name": "Can delete group info", - "content_type": 12, - "codename": "delete_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 48, - "fields": { - "name": "Can view group info", - "content_type": 12, - "codename": "view_groupinfo" - } - }, - { - "model": "auth.permission", - "pk": 49, - "fields": { - "name": "Can add prefix", - "content_type": 13, - "codename": "add_prefix" - } - }, - { - "model": "auth.permission", - "pk": 50, - "fields": { - "name": "Can change prefix", - "content_type": 13, - "codename": "change_prefix" - } - }, - { - "model": "auth.permission", - "pk": 51, - "fields": { - "name": "Can delete prefix", - "content_type": 13, - "codename": "delete_prefix" - } - }, - { - "model": "auth.permission", - "pk": 52, - "fields": { - "name": "Can view prefix", - "content_type": 13, - "codename": "view_prefix" - } - }, - { - "model": "auth.permission", - "pk": 53, - "fields": { - "name": "Can add BCOs with prefix BCO", - "content_type": 10, - "codename": "add_BCO" - } - }, - { - "model": "auth.permission", - "pk": 54, - "fields": { - "name": "Can change BCOs with prefix BCO", - "content_type": 10, - "codename": "change_BCO" - } - }, - { - "model": "auth.permission", - "pk": 55, - "fields": { - "name": "Can delete BCOs with prefix BCO", - "content_type": 10, - "codename": "delete_BCO" - } - }, - { - "model": "auth.permission", - "pk": 56, - "fields": { - "name": "Can view BCOs with prefix BCO", - "content_type": 10, - "codename": "view_BCO" - } - }, - { - "model": "auth.permission", - "pk": 57, - "fields": { - "name": "Can draft BCOs with prefix BCO", - "content_type": 10, - "codename": "draft_BCO" - } - }, - { - "model": "auth.permission", - "pk": 58, - "fields": { - "name": "Can publish BCOs with prefix BCO", - "content_type": 10, - "codename": "publish_BCO" - } - }, - { - "model": "auth.permission", - "pk": 59, - "fields": { - "name": "Can add group object permission", - "content_type": 14, - "codename": "add_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 60, - "fields": { - "name": "Can change group object permission", - "content_type": 14, - "codename": "change_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 61, - "fields": { - "name": "Can delete group object permission", - "content_type": 14, - "codename": "delete_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 62, - "fields": { - "name": "Can view group object permission", - "content_type": 14, - "codename": "view_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 63, - "fields": { - "name": "Can add user object permission", - "content_type": 15, - "codename": "add_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 64, - "fields": { - "name": "Can change user object permission", - "content_type": 15, - "codename": "change_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 65, - "fields": { - "name": "Can delete user object permission", - "content_type": 15, - "codename": "delete_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 66, - "fields": { - "name": "Can view user object permission", - "content_type": 15, - "codename": "view_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 67, - "fields": { - "name": "Can add BCOs with prefix TEST", - "content_type": 10, - "codename": "add_TEST" - } - }, - { - "model": "auth.permission", - "pk": 68, - "fields": { - "name": "Can change BCOs with prefix TEST", - "content_type": 10, - "codename": "change_TEST" - } - }, - { - "model": "auth.permission", - "pk": 69, - "fields": { - "name": "Can delete BCOs with prefix TEST", - "content_type": 10, - "codename": "delete_TEST" - } - }, - { - "model": "auth.permission", - "pk": 70, - "fields": { - "name": "Can view BCOs with prefix TEST", - "content_type": 10, - "codename": "view_TEST" - } - }, - { - "model": "auth.permission", - "pk": 71, - "fields": { - "name": "Can draft BCOs with prefix TEST", - "content_type": 10, - "codename": "draft_TEST" - } - }, - { - "model": "auth.permission", - "pk": 72, - "fields": { - "name": "Can publish BCOs with prefix TEST", - "content_type": 10, - "codename": "publish_TEST" - } - }, - { - "model": "auth.permission", - "pk": 73, - "fields": { - "name": "Can add BCOs with prefix OTHER", - "content_type": 10, - "codename": "add_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 74, - "fields": { - "name": "Can change BCOs with prefix OTHER", - "content_type": 10, - "codename": "change_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 75, - "fields": { - "name": "Can delete BCOs with prefix OTHER", - "content_type": 10, - "codename": "delete_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 76, - "fields": { - "name": "Can view BCOs with prefix OTHER", - "content_type": 10, - "codename": "view_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 77, - "fields": { - "name": "Can draft BCOs with prefix OTHER", - "content_type": 10, - "codename": "draft_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 78, - "fields": { - "name": "Can publish BCOs with prefix OTHER", - "content_type": 10, - "codename": "publish_OTHER" - } - }, - { - "model": "auth.permission", - "pk": 79, - "fields": { - "name": "Can add blacklisted token", - "content_type": 16, - "codename": "add_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 80, - "fields": { - "name": "Can change blacklisted token", - "content_type": 16, - "codename": "change_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 81, - "fields": { - "name": "Can delete blacklisted token", - "content_type": 16, - "codename": "delete_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 82, - "fields": { - "name": "Can view blacklisted token", - "content_type": 16, - "codename": "view_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 83, - "fields": { - "name": "Can add authentication", - "content_type": 17, - "codename": "add_authentication" - } - }, - { - "model": "auth.permission", - "pk": 84, - "fields": { - "name": "Can change authentication", - "content_type": 17, - "codename": "change_authentication" - } - }, - { - "model": "auth.permission", - "pk": 85, - "fields": { - "name": "Can delete authentication", - "content_type": 17, - "codename": "delete_authentication" - } - }, - { - "model": "auth.permission", - "pk": 86, - "fields": { - "name": "Can view authentication", - "content_type": 17, - "codename": "view_authentication" - } - }, - { - "model": "authtoken.token", - "pk": "07801a1a4cdbf1945e22ac8439f1db27fe813f7a", + { + "model": "auth.permission", + "pk": 1, "fields": { - "user": 6, - "created": "2022-05-10T20:35:53.548Z" + "name": "Can add log entry", + "content_type": 1, + "codename": "add_logentry" } }, { - "model": "authtoken.token", - "pk": "0bd55c955fcbfc269f6dc8f61ea107674cafdecb", + "model": "auth.permission", + "pk": 2, + "fields": { + "name": "Can change log entry", + "content_type": 1, + "codename": "change_logentry" + } + }, + { + "model": "auth.permission", + "pk": 3, + "fields": { + "name": "Can delete log entry", + "content_type": 1, + "codename": "delete_logentry" + } + }, + { + "model": "auth.permission", + "pk": 4, + "fields": { + "name": "Can view log entry", + "content_type": 1, + "codename": "view_logentry" + } + }, + { + "model": "auth.permission", + "pk": 5, + "fields": { + "name": "Can add permission", + "content_type": 2, + "codename": "add_permission" + } + }, + { + "model": "auth.permission", + "pk": 6, + "fields": { + "name": "Can change permission", + "content_type": 2, + "codename": "change_permission" + } + }, + { + "model": "auth.permission", + "pk": 7, + "fields": { + "name": "Can delete permission", + "content_type": 2, + "codename": "delete_permission" + } + }, + { + "model": "auth.permission", + "pk": 8, + "fields": { + "name": "Can view permission", + "content_type": 2, + "codename": "view_permission" + } + }, + { + "model": "auth.permission", + "pk": 9, "fields": { - "user": 8, - "created": "2022-05-10T20:53:42.503Z" + "name": "Can add group", + "content_type": 3, + "codename": "add_group" + } + }, + { + "model": "auth.permission", + "pk": 10, + "fields": { + "name": "Can change group", + "content_type": 3, + "codename": "change_group" + } + }, + { + "model": "auth.permission", + "pk": 11, + "fields": { + "name": "Can delete group", + "content_type": 3, + "codename": "delete_group" + } + }, + { + "model": "auth.permission", + "pk": 12, + "fields": { + "name": "Can view group", + "content_type": 3, + "codename": "view_group" + } + }, + { + "model": "auth.permission", + "pk": 13, + "fields": { + "name": "Can add user", + "content_type": 4, + "codename": "add_user" + } + }, + { + "model": "auth.permission", + "pk": 14, + "fields": { + "name": "Can change user", + "content_type": 4, + "codename": "change_user" + } + }, + { + "model": "auth.permission", + "pk": 15, + "fields": { + "name": "Can delete user", + "content_type": 4, + "codename": "delete_user" + } + }, + { + "model": "auth.permission", + "pk": 16, + "fields": { + "name": "Can view user", + "content_type": 4, + "codename": "view_user" + } + }, + { + "model": "auth.permission", + "pk": 17, + "fields": { + "name": "Can add content type", + "content_type": 5, + "codename": "add_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 18, + "fields": { + "name": "Can change content type", + "content_type": 5, + "codename": "change_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 19, + "fields": { + "name": "Can delete content type", + "content_type": 5, + "codename": "delete_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 20, + "fields": { + "name": "Can view content type", + "content_type": 5, + "codename": "view_contenttype" + } + }, + { + "model": "auth.permission", + "pk": 21, + "fields": { + "name": "Can add session", + "content_type": 6, + "codename": "add_session" + } + }, + { + "model": "auth.permission", + "pk": 22, + "fields": { + "name": "Can change session", + "content_type": 6, + "codename": "change_session" + } + }, + { + "model": "auth.permission", + "pk": 23, + "fields": { + "name": "Can delete session", + "content_type": 6, + "codename": "delete_session" + } + }, + { + "model": "auth.permission", + "pk": 24, + "fields": { + "name": "Can view session", + "content_type": 6, + "codename": "view_session" + } + }, + { + "model": "auth.permission", + "pk": 25, + "fields": { + "name": "Can add Token", + "content_type": 7, + "codename": "add_token" + } + }, + { + "model": "auth.permission", + "pk": 26, + "fields": { + "name": "Can change Token", + "content_type": 7, + "codename": "change_token" + } + }, + { + "model": "auth.permission", + "pk": 27, + "fields": { + "name": "Can delete Token", + "content_type": 7, + "codename": "delete_token" + } + }, + { + "model": "auth.permission", + "pk": 28, + "fields": { + "name": "Can view Token", + "content_type": 7, + "codename": "view_token" + } + }, + { + "model": "auth.permission", + "pk": 29, + "fields": { + "name": "Can add token", + "content_type": 8, + "codename": "add_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 30, + "fields": { + "name": "Can change token", + "content_type": 8, + "codename": "change_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 31, + "fields": { + "name": "Can delete token", + "content_type": 8, + "codename": "delete_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 32, + "fields": { + "name": "Can view token", + "content_type": 8, + "codename": "view_tokenproxy" + } + }, + { + "model": "auth.permission", + "pk": 33, + "fields": { + "name": "Can add blacklisted token", + "content_type": 9, + "codename": "add_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 34, + "fields": { + "name": "Can change blacklisted token", + "content_type": 9, + "codename": "change_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 35, + "fields": { + "name": "Can delete blacklisted token", + "content_type": 9, + "codename": "delete_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 36, + "fields": { + "name": "Can view blacklisted token", + "content_type": 9, + "codename": "view_blacklistedtoken" + } + }, + { + "model": "auth.permission", + "pk": 37, + "fields": { + "name": "Can add group object permission", + "content_type": 10, + "codename": "add_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 38, + "fields": { + "name": "Can change group object permission", + "content_type": 10, + "codename": "change_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 39, + "fields": { + "name": "Can delete group object permission", + "content_type": 10, + "codename": "delete_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 40, + "fields": { + "name": "Can view group object permission", + "content_type": 10, + "codename": "view_groupobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 41, + "fields": { + "name": "Can add user object permission", + "content_type": 11, + "codename": "add_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 42, + "fields": { + "name": "Can change user object permission", + "content_type": 11, + "codename": "change_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 43, + "fields": { + "name": "Can delete user object permission", + "content_type": 11, + "codename": "delete_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 44, + "fields": { + "name": "Can view user object permission", + "content_type": 11, + "codename": "view_userobjectpermission" + } + }, + { + "model": "auth.permission", + "pk": 45, + "fields": { + "name": "Can add new user", + "content_type": 12, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 46, + "fields": { + "name": "Can change new user", + "content_type": 12, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 47, + "fields": { + "name": "Can delete new user", + "content_type": 12, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 48, + "fields": { + "name": "Can view new user", + "content_type": 12, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 49, + "fields": { + "name": "Can add authentication", + "content_type": 13, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 50, + "fields": { + "name": "Can change authentication", + "content_type": 13, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 51, + "fields": { + "name": "Can delete authentication", + "content_type": 13, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 52, + "fields": { + "name": "Can view authentication", + "content_type": 13, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 53, + "fields": { + "name": "Can view BCOs with prefix NOPUB", + "content_type": 13, + "codename": "view_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can add BCOs with prefix NOPUB", + "content_type": 13, + "codename": "add_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can change BCOs with prefix NOPUB", + "content_type": 13, + "codename": "change_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can delete BCOs with prefix NOPUB", + "content_type": 13, + "codename": "delete_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can publish BCOs with prefix NOPUB", + "content_type": 13, + "codename": "publish_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can add new user", + "content_type": 11, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can change new user", + "content_type": 11, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can delete new user", + "content_type": 11, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can view new user", + "content_type": 11, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can add authentication", + "content_type": 10, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can change authentication", + "content_type": 10, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can delete authentication", + "content_type": 10, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can view authentication", + "content_type": 10, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 66, + "fields": { + "name": "Can add bco", + "content_type": 12, + "codename": "add_bco" + } + }, + { + "model": "auth.permission", + "pk": 67, + "fields": { + "name": "Can change bco", + "content_type": 12, + "codename": "change_bco" + } + }, + { + "model": "auth.permission", + "pk": 68, + "fields": { + "name": "Can delete bco", + "content_type": 12, + "codename": "delete_bco" + } + }, + { + "model": "auth.permission", + "pk": 69, + "fields": { + "name": "Can view bco", + "content_type": 12, + "codename": "view_bco" + } + }, + { + "model": "auth.permission", + "pk": 70, + "fields": { + "name": "Can add prefix", + "content_type": 13, + "codename": "add_prefix" + } + }, + { + "model": "auth.permission", + "pk": 71, + "fields": { + "name": "Can change prefix", + "content_type": 13, + "codename": "change_prefix" + } + }, + { + "model": "auth.permission", + "pk": 72, + "fields": { + "name": "Can delete prefix", + "content_type": 13, + "codename": "delete_prefix" + } + }, + { + "model": "auth.permission", + "pk": 73, + "fields": { + "name": "Can view prefix", + "content_type": 13, + "codename": "view_prefix" + } + }, + { + "model": "auth.user", + "pk": 1, + "fields": { + "password": "!Bh8Fg1xZLdW7N3SEpDh5IO2PzJZtsMDEqwHeJn5w", + "last_login": null, + "is_superuser": false, + "username": "AnonymousUser", + "first_name": "", + "last_name": "", + "email": "", + "is_staff": false, + "is_active": true, + "date_joined": "2024-03-14T13:52:22.277Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "tester", + "first_name": "", + "last_name": "", + "email": "tester@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 53 + ] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab", + "first_name": "", + "last_name": "", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe", + "first_name": "", + "last_name": "", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 7, + "fields": { + "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", + "last_login": "2024-04-10T22:06:48.972Z", + "is_superuser": true, + "username": "bco_api_user", + "first_name": "", + "last_name": "", + "email": "object.biocompute@gmail.com", + "is_staff": true, + "is_active": true, + "date_joined": "2024-04-03T10:39:01Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 57, + 53 + ] + } + }, + + { + "model": "sessions.session", + "pk": "0gv8wnnng15dgqxaysg8vs194c96ux2x", + "fields": { + "session_data": ".eJxVjEEOwiAQRe_C2hAcKAWX7j0DGZhBqgaS0q6Md7dNutDtf-_9twi4LiWsnecwkbiIUZx-t4jpyXUH9MB6bzK1usxTlLsiD9rlrRG_rof7d1Cwl632Hny2PCqHenAUWWeljIsKjMpOG8WIvPFEDASG0HqfXAQYEHO0Z_H5AuH2ODg:1rug5g:BEIA7KC4h5LRM3Nqo-163CC0oUR-Fw_QdB3aMV0UAaY", + "expire_date": "2024-04-24T22:06:48.974Z" } }, { "model": "authtoken.token", - "pk": "166c6a8f7e6e34827f4231a37c73f4ff985b43a2", + "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", "fields": { "user": 5, - "created": "2022-05-10T20:35:14.846Z" + "created": "2024-03-14T15:21:04.318Z" } }, { "model": "authtoken.token", - "pk": "2f2a599026581c158a07f968c56292c77f4be875", + "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", "fields": { - "user": 2, - "created": "2022-05-10T20:35:14.509Z" + "user": 7, + "created": "2024-04-03T10:53:08.951Z" } }, { "model": "authtoken.token", - "pk": "3f5504d88a5085d0452b19350fb6f82ae7097dd0", + "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", "fields": { - "user": 9, - "created": "2022-05-10T20:54:44.797Z" + "user": 4, + "created": "2024-03-14T15:21:14.996Z" } }, { "model": "authtoken.token", - "pk": "627626823549f787c3ec763ff687169206626149", + "pk": "b8e588c4bdfb366420007827054042e8e594ec51", "fields": { - "user": 3, - "created": "2022-05-10T20:35:14.520Z" + "user": 1, + "created": "2024-03-14T13:53:45.793Z" } }, { "model": "authtoken.token", - "pk": "8d66642cb77c4cb55af75d0f6c4f2835f805dcaa", + "pk": "ba1a932a6af59930293e087c1633fa60927b6690", "fields": { - "user": 4, - "created": "2022-05-10T20:35:14.695Z" + "user": 6, + "created": "2024-03-14T15:21:09.348Z" } }, { - "model": "authtoken.token", - "pk": "c400a6076a2dfe7e9906ab86c6ad4574d1d60e03", + "model": "authentication.authentication", + "pk": 1, "fields": { - "user": 7, - "created": "2022-05-10T20:50:39.096Z" + "username": "bco_api_user", + "auth_service": [ + { + "iss": "Reeya1", + "sub": "ReeyaGupta1" + } + ] + } + }, + { + "model": "authentication.newuser", + "pk": 1, + "fields": { + "email": "test_new_user@testing.com", + "temp_identifier": "sample_temp_identifier", + "token": "token", + "hostname": "http://localhost:8000/", + "created": "2024-03-14T14:28:32Z" + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 7, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 23, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "eb8ac2d04b2d3204b88e0bc6e3a66dcfac4af934c1ebe7ce629f8f584d5f3d7a", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2024-04-11T16:44:51.054Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-11T16:44:51.054Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] } }, - { - "model": "auth.group", - "pk": 1, - "fields": { - "name": "bco_drafter", - "permissions": [ - 53, - 54, - 55, - 57, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 2, - "fields": { - "name": "bco_publisher", - "permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.group", - "pk": 3, - "fields": { - "name": "anon", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 4, - "fields": { - "name": "wheel", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 5, - "fields": { - "name": "group_admins", - "permissions": [ - 9, - 10, - 11, - 12 - ] - } - }, - { - "model": "auth.group", - "pk": 6, - "fields": { - "name": "prefix_admins", - "permissions": [ - 49, - 50, - 51, - 52 - ] - } - }, - { - "model": "auth.group", - "pk": 7, - "fields": { - "name": "AnonymousUser", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 8, - "fields": { - "name": "bco_api_user", - "permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 9, - "fields": { - "name": "test50", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 10, - "fields": { - "name": "hivelab37", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 11, - "fields": { - "name": "jdoe58", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 12, - "fields": { - "name": "test_drafter", - "permissions": [ - 67, - 68, - 69, - 71, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 13, - "fields": { - "name": "test_publisher", - "permissions": [ - 67, - 68, - 69, - 71, - 72, - 70 - ] - } - }, - { - "model": "auth.group", - "pk": 14, - "fields": { - "name": "other_drafter", - "permissions": [ - 73, - 74, - 75, - 77, - 76 - ] - } - }, - { - "model": "auth.group", - "pk": 15, - "fields": { - "name": "other_publisher", - "permissions": [ - 73, - 74, - 75, - 77, - 78, - 76 - ] - } - }, - { - "model": "auth.user", - "pk": 1, - "fields": { - "password": "!i7FmD5oJKoZbSswUfPpd5hHZTO1uUL4M26R2DIzb", - "last_login": null, - "is_superuser": false, - "username": "bco_drafter", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.496Z", - "groups": [ - 1 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 2, - "fields": { - "password": "!zwQlrQ6x12cENcNlfEBkImrSqyM1BaC6gZwEdJzm", - "last_login": null, - "is_superuser": false, - "username": "bco_publisher", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.506Z", - "groups": [ - 1, - 2 - ], - "user_permissions": [ - 53, - 54, - 55, - 57, - 58, - 56 - ] - } - }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "!nFpSYz0kD54JC8eO25OIH5sZpPYnjNpYyh5th60k", - "last_login": null, - "is_superuser": false, - "username": "anon", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.517Z", - "groups": [ - 3 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$CYgsYlwKXcRZrLo5HSr4jU$4MmwM6zGNaIzmQyY90oWqP5J3qdrbige5P02T0N0Z60=", - "last_login": "2023-07-24T14:30:51.334Z", - "is_superuser": true, - "username": "wheel", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.528Z", - "groups": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "!eYwmI7Fc6k6AF6TNLEYV9K9BzbyHJEM5EugCKKOU", - "last_login": null, - "is_superuser": false, - "username": "AnonymousUser", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:35:14.844Z", - "groups": [ - 1, - 2, - 7 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2023-01-14T12:21:51.437Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "", - "last_name": "", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53Z", - "groups": [ - 1, - 2, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "user_permissions": [ - 73, - 67, - 74, - 68, - 75, - 69, - 77, - 71, - 78, - 72, - 76, - 70 - ] - } - }, - { - "model": "auth.user", - "pk": 7, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "test50", - "first_name": "", - "last_name": "", - "email": "test@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [ - 1, - 2, - 9, - 12 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 8, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab37", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [ - 1, - 2, - 10 - ], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 9, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe58", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [ - 1, - 2, - 11 - ], - "user_permissions": [] - } - }, - { - "model": "contenttypes.contenttype", - "pk": 1, - "fields": { - "app_label": "admin", - "model": "logentry" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 2, - "fields": { - "app_label": "auth", - "model": "permission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 3, - "fields": { - "app_label": "auth", - "model": "group" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 4, - "fields": { - "app_label": "auth", - "model": "user" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 5, - "fields": { - "app_label": "contenttypes", - "model": "contenttype" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 6, - "fields": { - "app_label": "sessions", - "model": "session" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 7, - "fields": { - "app_label": "authtoken", - "model": "token" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 8, - "fields": { - "app_label": "authtoken", - "model": "tokenproxy" - } - }, - { - "model": "sessions.session", - "pk": "3qd5skoalb5ly4dsuqcb5esqj5j9qhzt", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGK4j:oPTKR7Co3iXf6S_I2K7tHYWcNu2MvGhg3SLWUEGUzsE", - "expire_date": "2023-01-27T13:26:29.682Z" - } - }, - { - "model": "sessions.session", - "pk": "82y6iptnatolxvvuza5tjpftnjs15ucs", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6Ctw:poy6l1agnmqGeCEBGDUHoPJmt_d7BoLfpQmxeubFgv4", - "expire_date": "2022-07-12T15:13:16.934Z" - } - }, - { - "model": "sessions.session", - "pk": "860l0sgsts303jiqqkbfbqb1b5drzber", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pEUh2:UTjS-6q1AeDxypKMT47pePYR8tWXEHAwMgYl2r2Hxyw", - "expire_date": "2023-01-22T12:22:28.297Z" - } - }, - { - "model": "sessions.session", - "pk": "a1s21bvi44c07la3bvv30q8l4brogz4a", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1ozlTd:D4Ba4G-L6eSml3IKbwxb0QnLb51bSoCBiJxJc54lJyI", - "expire_date": "2022-12-12T21:15:45.128Z" - } - }, - { - "model": "sessions.session", - "pk": "aa07l502m3oxm2o0frkoiatmmj18v4rq", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2pCRwiAu3fcMBJhBqgaS0q6Md7dNutDtf-_9t_BhXYpfO89-InEVWpx-txjSk-sO6BHqvcnU6jJPUe6KPGiXYyN-3Q7376CEXrZ6AOcAdLIZUJtM0RoeOGcYABHOREEhW5U1XxyhwYS8ydGSMhYokvh8AdKJN94:1qNwaJ:Oh_qoPjlNq2dgzb4RfJ7b0_I2y-thVrhsRf7mqxxJEs", - "expire_date": "2023-08-07T14:30:51.336Z" - } - }, - { - "model": "sessions.session", - "pk": "aa91y2h5pktdnhqqpch0nsyv3kvmr5ff", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWk6:qMUi_RrH827urayQL-oOaSVTQWNGdKP8s6TLThYq0HM", - "expire_date": "2022-05-24T20:46:02.514Z" - } - }, - { - "model": "sessions.session", - "pk": "cjhktkzqh9exi5w3ys7fqkwvhmylt5rg", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pFco8:V4jc9iYIUck8A4V8iBucgsTCAdddFirKT2xPXWjJjPw", - "expire_date": "2023-01-25T15:14:28.952Z" - } - }, - { - "model": "sessions.session", - "pk": "cuh93ef9py0gyskhvg20jm2tlvfr67u6", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1noWs5:B78XeRd4tP8TOd87u42iEAAp5wRdfJPTX4V0yufvaIU", - "expire_date": "2022-05-24T20:54:17.321Z" - } - }, - { - "model": "sessions.session", - "pk": "e1an9x1y34jwxw2m7x6gj989vfge552q", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGK1i:AVY30CitrKS74D_bd_s3QRH3bbGVuZjvLMgphfALevg", - "expire_date": "2023-01-27T13:23:22.911Z" - } - }, - { - "model": "sessions.session", - "pk": "efp83ves8qp00gawsisfoz7qk4xzne97", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGKDF:uLjZXoiMuoii6tEGiAGqeWq8PdfHBHAZNX9gj11RfC8", - "expire_date": "2023-01-27T13:35:17.624Z" - } - }, - { - "model": "sessions.session", - "pk": "h4m4295sj843xvqmfajlds3dkvuq1um0", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pGfXj:WcrCO7Drey9jEUGWdxJWuH9KiEiYrYx-7GiD0IdXPZU", - "expire_date": "2023-01-28T12:21:51.438Z" - } - }, - { - "model": "sessions.session", - "pk": "hcfujqufa0xy2alniz38tke3dqm9eqof", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1pFeup:24vcZq-yo1W12kLp1aKc78gPX7Ei9bzFG-cQ9_dwnC8", - "expire_date": "2023-01-25T17:29:31.162Z" - } - }, - { - "model": "sessions.session", - "pk": "lout2mwzatqi98yxtlt87gsfy9zrha63", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1olDuY:YIHthO6KrZicQixjS7_vlWVZK2C0zwDtrvKZK0X60YI", - "expire_date": "2022-11-02T18:35:26.147Z" - } - }, - { - "model": "sessions.session", - "pk": "mun3kmvefd3yvouew9h0i5sb5gldyyxp", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1o6KH3:MzYA7DmMSpebSMTKgHH3V8cqMYWzDFvpcW80C7z_gZY", - "expire_date": "2022-07-12T23:05:37.317Z" - } - }, - { - "model": "sessions.session", - "pk": "zpzeih1sxl2o0krggucjc7ta0jiwvdwf", - "fields": { - "session_data": ".eJxVjEEOwiAQRe_C2hCoDKBL956BzAyDVA1NSrsy3l2bdKHb_977L5VwXWpau8xpzOqsvDr8boT8kLaBfMd2mzRPbZlH0puid9r1dcryvOzu30HFXr-1eLAeTg4ymWLEhwwETogKe7YmBBwiW4vHQayLxB7RYXauhAgCCOr9AennOCw:1otEuA:P-lliBdzVvpKmP6zYqrZRWQ-ueoPUC9JRAI6JViGalY", - "expire_date": "2022-11-24T21:16:10.253Z" - } - }, - { - "model": "api.bco", - "pk": 1, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.1", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:10:12.804Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:10:17.996Z" - } - }, - { - "model": "api.bco", - "pk": 2, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.1", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:13:13.841Z" - } - }, - { - "model": "api.bco", - "pk": 3, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0", - "created": "2018-11-29T11:29:08-0500", - "modified": "2022-06-28T23:19:38.283Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:19:53.937Z" - } - }, - { - "model": "api.bco", - "pk": 4, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:47.218Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", - "owner_group": "bco_drafter", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:21:56.878Z" - } - }, - { - "model": "api.bco", - "pk": 5, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-06-28T23:44:49.394Z", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", - "owner_group": "test_drafter", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:44:58.149Z" - } - }, - { - "model": "api.bco", - "pk": 6, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": "", - "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", - "owner_group": "other_drafter", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2022-06-28T23:41:49.698Z" - } - }, - { - "model": "api.bco", - "pk": 7, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", - "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.3", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:06:43.263Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "description_domain": { - "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - }, - { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" - } - ], - "version": "1.1" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Influenza A", - "category_name": "species" - }, - { - "category_value": "nucleotide", - "category_name": "molecule" - }, - { - "category_value": "Influenza A", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:10:18.007Z" - } - }, - { - "model": "api.bco", - "pk": 8, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", - "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "description_domain": { - "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ], - "output_list": [ - { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", - "owner_group": "test50", - "owner_user": "test50", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:13:13.859Z" - } - }, - { - "model": "api.bco", - "pk": 9, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0.0", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], - "obsolete_after": "2118-09-26T14:43:43-0400", - "embargo": { - "start_time": "2000-09-26T14:43:43-0400", - "end_time": "2000-09-26T14:43:45-0400" - }, - "created": "2018-11-29T11:29:08-0500", - "modified": "2018-11-30T11:29:08-0500", - "contributors": [ - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", - "contribution": [ - "createdBy", - "curatedBy", - "authoredBy" - ], - "orcid": "https://orcid.org/0000-0001-88238-9945" - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." - ], - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" - } - } - ], - "description_domain": { - "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" - ], - "xref": [ - { - "namespace": "uberon", - "name": "Uber Anatomy Ontology", - "ids": [ - "0001988" - ], - "access_time": "2016-11-30T06:46-0500" - }, - { - "namespace": "taxonomy", - "name": "Taxonomy", - "ids": [ - "9606" - ], - "access_time": "2016-11-30T06:46-0500" - } - ], - "platform": [ - "hive" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], - "input_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - }, - { - "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "input_list": [ - { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" - } - ], - "output_list": [ - { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" - } - } - ], - "script_driver": "shell", - "software_prerequisites": [ - { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "external_data_endpoints": [ - { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" - }, - { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" - } - ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } - }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } - } - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", - "owner_group": "hivelab37", - "owner_user": "hivelab37", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:19:53.938Z" - } - }, - { - "model": "api.bco", - "pk": 10, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", - "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:13.091Z", - "review": [], - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "curatedBy", - "importedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - } - ], - "license": "MIT" - }, - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "description_domain": { - "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" - ], - "platform": [], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - }, - { - "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], - "input_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "output_list": [ - { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" - } - ], - "version": "1.0" - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - } - ], - "script_driver": "python3", - "software_prerequisites": [ - { - "name": "Python", - "version": "3.10.0", - "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" - } - } - ], - "external_data_endpoints": [ - { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/plain", - "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" - } - } - ] - }, - "parametric_domain": [], - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "SARS-CoV-2", - "category_name": "species" - }, - { - "category_value": "protein", - "category_name": "molecule" - }, - { - "category_value": "SARS-CoV-2", - "category_name": "tag" - }, - { - "category_value": "fasta", - "category_name": "file_type" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "reviewed", - "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", - "owner_group": "jdoe58", - "owner_user": "jdoe58", - "prefix": "BCO", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:21:56.879Z" - } - }, - { - "model": "api.bco", - "pk": 11, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", - "provenance_domain": { - "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", - "version": "1.0", - "created": "2017-11-12T12:30:48-0400", - "modified": "2022-06-28T23:41:33.439Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "curatedBy" - ] - } - }, - { - "status": "approved", - "date": "2017-11-12T12:30:48-0400", - "reviewer_comment": "Approved by Critical Path Institute staff.", - "reviewer": { - "name": "Kenneth Ramey", - "affiliation": "Critical Path Institute", - "email": "kramey@c-path.org", - "contribution": [ - "curatedBy" - ] - } - } - ], - "contributors": [ - { - "name": "Matthew Ezewudo", - "affiliation": "Critical Path Institute", - "email": "mezewudo@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Jamie Posie", - "affiliation": "CDC Atlanta, GA", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Anjan Purkayastha", - "affiliation": "George Washington University", - "email": "anjan.purkayastha@gmail.com", - "contribution": [ - "authoredBy", - "curatedBy" - ] - }, - { - "name": "Marco Schito", - "affiliation": "Critical Path Institute", - "email": "mschito@c-path.org", - "contribution": [ - "authoredBy" - ] - }, - { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "authoredBy", - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "name": "ReseqTB Consortium", - "affiliation": "Critical Path Institute", - "email": "info@c-path.org", - "contribution": [ - "createdAt" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" - }, - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "description_domain": { - "keywords": [ - "Mycobacterium tuberculosis", - "Phylogenetics", - "Bacterial lineage analysis", - "Single Nucleotide Polymorphism", - "SNP" - ], - "platform": [ - "Linux" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "FastQValidator", - "description": "To verify if input file is in fastq format", - "version": "1.0.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" - } - ] - }, - { - "step_number": 2, - "name": "FastQC", - "description": "assess Quality of raw sequence reads", - "version": "0.11.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" - } - ] - }, - { - "step_number": 3, - "name": "Kraken", - "description": "Assesses species specificity of sequence reads", - "version": "0.10.5", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" - } - ] - }, - { - "step_number": 4, - "name": "BWA", - "description": "Aligns sequence reads to reference genome", - "version": "0.7.12", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ] - }, - { - "step_number": 5, - "name": "Qualimap", - "description": "Assess mapping quality of aligned reads", - "version": "2.1.1", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" - } - ] - }, - { - "step_number": 6, - "name": "MarkDuplicates", - "description": "Removes duplicate reads from alignment", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ] - }, - { - "step_number": 7, - "name": "IndelRealigner", - "description": "Perfoms re-alignment around insertions and deletions", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ] - }, - { - "step_number": 8, - "name": "BaseRecalibrator", - "description": "Recalibrates base quality scores", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "name": "Variation sites file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ] - }, - { - "step_number": 9, - "name": "BuildBamIndex", - "description": "Indexes sorted BAM files for variant calling", - "version": "1.134", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" - } - ] - }, - { - "step_number": 10, - "name": "UnifiedGenotyper", - "description": "Calls variant positions in alignment", - "version": "3.4.0", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv genome reference file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - }, - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" - } - ] - }, - { - "step_number": 11, - "name": "VCFtools", - "description": "Filters raw VCF to exclude poor quality variants", - "version": "0.1.12b", - "prerequisite": [ - { - "name": "Excluded list file", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ] - }, - { - "step_number": 12, - "name": "SnpEff", - "description": "Annotates variants in VCF file", - "version": "4.1", - "prerequisite": [ - { - "name": "M. tuberculosis H37Rv GenBank File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ] - }, - { - "step_number": 13, - "name": "parse_annotation.py", - "description": "Parses annotated VCF to create annotation text file", - "version": "", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ] - }, - { - "step_number": 14, - "name": "lineage_parser.py", - "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", - "version": "", - "prerequisite": [ - { - "name": "Lineage Markers File", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - } - ], - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - ] - }, - { - "step_number": 15, - "name": "BEDtools", - "description": "Creates loci based coverage statistics of genome coverage", - "version": "2.17.0", - "input_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - ] - }, - { - "step_number": 16, - "name": "resis_parser.py", - "description": "Creates a coverage depth and width table of all loci in isolate genome", - "version": "", - "input_list": [ - { - "uri": "[path_to_genome_loci_text_file]" - }, - { - "uri": "[path_to_per_position_depth_text_file]" - } - ], - "output_list": [ - { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" - } - } - ], - "script_driver": "Python", - "software_prerequisites": [ - { - "name": "BEDtools", - "version": "2.17.0", - "uri": { - "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" - } - }, - { - "name": "Bcftools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "352908143497da0640b928248165e83212dc4298" - } - }, - { - "name": "BWA", - "version": "0.7.12", - "uri": { - "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" - } - }, - { - "name": "FastQC", - "version": "0.11.5", - "uri": { - "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "GATK", - "version": "3.4.0", - "uri": { - "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" - } - }, - { - "name": "Kraken", - "version": "0.10.5", - "uri": { - "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Picard", - "version": "1.134", - "uri": { - "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" - } - }, - { - "name": "Pigz", - "version": "2.3.3", - "uri": { - "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Qualimap", - "version": "2.11", - "uri": { - "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "Samtools", - "version": "1.2", - "uri": { - "uri": "https://github.com/samtools/samtools/archive/1.2.zip", - "access_time": "2018-10-08T18:35:33-0400" - } - }, - { - "name": "SnpEff", - "version": "4.1", - "uri": { - "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" - } - }, - { - "name": "Vcftools", - "version": "0.1.12b", - "uri": { - "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", - "access_time": "2018-10-08T18:35:33-0400", - "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" - } - } - ], - "external_data_endpoints": [ - { - "name": "BCOReSeqTB", - "url": "https://github.com/CPTR-ReSeqTB/UVP/" - } - ], - "environment_variables": { - "CORE": "8" - } - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" - } - }, - { - "uri": { - "filename": "Mycobacterium tuberculosis H37Rv, complete genome", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" - } - }, - { - "uri": { - "filename": "excluded_loci", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" - } - }, - { - "uri": { - "filename": "lineage_markers", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" - } - }, - { - "uri": { - "filename": "variation sites", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" - } - }, - { - "uri": { - "filename": "ERR552106_2.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" - } - }, - { - "uri": { - "filename": "ERR552106_1.fastq.gz", - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" - } - }, - { - "mediatype": "text/csv", - "uri": { - "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" - } - } - ] - }, - "error_domain": { - "empirical_error": { - "description": [ - "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", - "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." - ], - "parameters": { - "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", - "total_sample_size": "180", - "platform": "Illumina HiSeq 2000", - "paired_end": true, - "length": "100", - "simulated": true, - "program": "ART", - "simulator_parameters": [ - { - "ss": "hs20" - }, - { - "l": "100" - }, - { - "m": "500" - }, - { - "qU": "45" - }, - { - "s": "100" - } - ], - "sequence_quality_level_parameters": { - "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", - "sequence_quality_high": { - "substitution_error_rate_R1": "0.0004", - "substitution_error_rate_R2": "0.0007", - "insertion_error_rate_R1": "0.00009", - "insertion_error_rate_R2": "0.00015", - "deletion_error_rate_R1": "0.00011", - "deletion_error_rate_R2": "0.00023", - "units": "errors per sequenced base" - }, - "sequence_quality_medium": { - "substitution_error_rate_R1": "0.004", - "substitution_error_rate_R2": "0.007", - "insertion_error_rate_R1": "0.0009", - "insertion_error_rate_R2": "0.0015", - "deletion_error_rate_R1": "0.0011", - "deletion_error_rate_R2": "0.0023", - "units": "errors per sequenced base" - }, - "sequence_quality_low": { - "substitution_error_rate_R1": "0.04", - "substitution_error_rate_R2": "0.07", - "insertion_error_rate_R1": "0.009", - "insertion_error_rate_R2": "0.015", - "deletion_error_rate_R1": "0.011", - "deletion_error_rate_R2": "0.023", - "units": "errors per sequenced base" - } - } - }, - "summary results": { - "sequence_quality_high": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "93.33", - "mean_AR_identification_rate": "86.72", - "Units": "Percentage" - } - }, - "sequence_quality_medium": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "90.00", - "mean_AR_identification_rate": "81.00", - "Units": "Percentage" - } - }, - "sequence_quality_low": { - "sample size": "60", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_10": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "41.67", - "mean_AR_identification_rate": "22.42", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "63.89", - "mean_AR_identification_rate": "57.14", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.46", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "36", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "66.66", - "Units": "Percentage" - } - } - }, - "detailed results": [ - { - "sequence_quality_high": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "66.67", - "mean_AR_identification_rate": "40.75", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "92.85", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_medium": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "58.34", - "mean_AR_identification_rate": "26.50", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "91.66", - "mean_AR_identification_rate": "78.57", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "99.40", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "100.00", - "mean_AR_identification_rate": "100.00", - "Units": "Percentage" - } - } - } - }, - { - "sequence_quality_low": { - "coverage_10": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_15": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_20": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_25": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - }, - "coverage_30": { - "sample size": "12", - "result": { - "lineage_assignment_rate": "0.00", - "mean_AR_identification_rate": "0.00", - "Units": "Percentage" - } - } - } - } - ] - }, - "algorithmic_error": { - "placeholder": "for algorithmic error domain" - } - }, - "extension_domain": [ - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", - "scm_type": "git", - "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", - "scm_path": "UVP/scripts/UVP.py" - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:41:49.719Z" - } - }, - { - "model": "api.bco", - "pk": 12, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.0", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", - "owner_group": "bco_api_user", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:44:58.161Z" - } - }, - { - "model": "api.bco", - "pk": 13, - "fields": { - "contents": { - "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", - "provenance_domain": { - "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", - "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", - "contributors": [ - { - "contribution": [ - "createdBy", - "authoredBy", - "contributedBy" - ], - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "orcid": "https://orcid.org/0000-0003-1409-4549" - }, - { - "contribution": [ - "curatedBy", - "contributedBy" - ], - "name": "Stephanie Singleton", - "affiliation": "The George Washington University ", - "email": "ssingleton@gwu.edu" - }, - { - "contribution": [ - "createdBy", - "curatedBy" - ], - "name": "Jonathon Keeney", - "affiliation": "The George Washington University ", - "email": "keeneyjg@gwu.edu" - }, - { - "name": "Raja Mazumder", - "contribution": [ - "curatedBy" - ], - "affiliation": "The George Washington University ", - "email": "mazumder@gwu.edu", - "orcid": "https://orcid.org/0000-0001-8823-9945" - } - ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" - }, - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "description_domain": { - "keywords": [ - "curation", - "definitions", - "ontology", - "controlled vocabulary" - ], - "pipeline_steps": [ - { - "step_number": 1, - "name": "Header download", - "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", - "prerequisite": [], - "input_list": [ - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - }, - { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - ], - "output_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", - "filename": "headers.txt" - } - ] - }, - { - "step_number": 2, - "name": "Manual Curation", - "description": "Manual curation of headers.txt into a curated list of terms with definitions.", - "prerequisite": [], - "input_list": [ - { - "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" - } - ], - "output_list": [ - { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - ] - } - ] - }, - "execution_domain": { - "script": [ - { - "uri": { - "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", - "filename": "FINAL_v0.3_argos_dict" - } - } - ], - "script_driver": "Google Drive/Sheets", - "software_prerequisites": [ - { - "name": "Microsof Excel", - "version": "16.57", - "uri": { - "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" - } - } - ], - "external_data_endpoints": [ - { - "name": "data.ARGOSdb.org", - "url": "data.ARGOSdb.org" - }, - { - "name": "Google Drive", - "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" - } - ], - "environment_variables": {} - }, - "io_domain": { - "input_subdomain": [ - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", - "filename": "SRA_ngsQC.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", - "filename": "PRJNA231221_AssemblyUpdated.tsv" - } - }, - { - "uri": { - "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", - "filename": "sars-cov-2_lineage_mutations.tsv" - } - } - ], - "output_subdomain": [ - { - "mediatype": "text/tsv", - "uri": { - "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", - "access_time": "2022-02-03T13:42:44-0500", - "filename": "annotation_property_list.tsv" - } - } - ] - }, - "parametric_domain": [], - "error_domain": { - "empirical_error": {}, - "algorithmic_error": {} - }, - "extension_domain": [ - { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", - "dataset_extension": { - "additional_license": { - "data_license": "https://creativecommons.org/licenses/by/4.0/", - "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" - }, - "dataset_categories": [ - { - "category_value": "Other", - "category_name": "species" - }, - { - "category_value": "Other", - "category_name": "molecule" - }, - { - "category_value": "non-core", - "category_name": "priority" - }, - { - "category_value": "Dictionary", - "category_name": "species" - }, - { - "category_value": "tsv", - "category_name": "file_type" - }, - { - "category_value": "reviewed", - "category_name": "status" - } - ] - } - } - ] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", - "owner_group": "bco_api_user", - "owner_user": "test50", - "prefix": "TEST", - "schema": "IEEE", - "state": "PUBLISHED", - "last_update": "2022-06-28T23:44:58.161Z" - } - }, - { - "model": "api.bco", - "pk": 14, - "fields": { - "contents": { - "object_id": "", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", - "provenance_domain": { - "name": "", - "version": "", - "license": "", - "created": "2023-09-05T18:10:23", - "modified": "2023-09-05T18:10:23.167Z", - "contributors": [ - { - "name": "", - "affiliation": "", - "email": "", - "contribution": [], - "orcid": "" - } - ] - }, - "usability_domain": [], - "description_domain": { - "pipeline_steps": [] - }, - "parametric_domain": [], - "io_domain": {}, - "execution_domain": { - "script": [], - "script_driver": "", - "software_prerequisites": [], - "external_data_endpoints": [], - "environment_variables": {} - }, - "extension_domain": [] - }, - "object_class": null, - "object_id": "http://127.0.0.1:8000/TEST_000002/DRAFT", - "owner_group": "bco_api_user", - "owner_user": "test50", - "prefix": "DRAFT", - "schema": "IEEE", - "state": "DRAFT", - "last_update": "2023-09-24T09:16:04.123Z" - } - }, - { - "model": "api.groupinfo", - "pk": 1, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Group administrators", - "expiration": null, - "group": "group_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 2, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Prefix administrators", - "expiration": null, - "group": "prefix_admins", - "max_n_members": -1, - "owner_user": "wheel" - } - }, - { - "model": "api.groupinfo", - "pk": 3, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 4, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just a test prefix.", - "expiration": null, - "group": "test_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 5, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_drafter", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.groupinfo", - "pk": 6, - "fields": { - "delete_members_on_group_deletion": false, - "description": "Just an other prefix.", - "expiration": null, - "group": "other_publisher", - "max_n_members": -1, - "owner_user": "bco_api_user" - } - }, - { - "model": "api.prefix_table", - "pk": 1, - "fields": { - "n_objects": 8, - "prefix": "BCO" - } - }, - { - "model": "api.prefix_table", - "pk": 2, - "fields": { - "n_objects": 3, - "prefix": "TEST" - } - }, - { - "model": "api.prefix_table", - "pk": 3, - "fields": { - "n_objects": 3, - "prefix": "OTHER" - } - }, - { - "model": "api.prefix", - "pk": 1, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T20:35:14.712Z", - "created_by": "bco_publisher", - "description": null, - "expires": null, - "owner_group": "bco_publisher", - "owner_user": "bco_publisher", - "prefix": "BCO" - } - }, - { - "model": "api.prefix", - "pk": 2, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:32.633Z", - "created_by": "bco_api_user", - "description": "Just a test prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "TEST" - } - }, - { - "model": "api.prefix", - "pk": 3, - "fields": { - "certifying_server": null, - "certifying_key": null, - "created": "2022-05-10T21:48:35.104Z", - "created_by": "bco_api_user", - "description": "Just an other prefix.", - "expires": null, - "owner_group": "bco_api_user", - "owner_user": "bco_api_user", - "prefix": "OTHER" - } - }, - { - "model": "authentication.authentication", - "pk": 1, - "fields": { - "username": "bco_api_user", - "auth_service": [ - { - "iss": "Reeya1", - "sub": "ReeyaGupta1" - }, - { - "iss": "string", - "sub": "string" - } - ] - } - } + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0d692b94bc2528660470e38c78708cc06605f941", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2024-04-04T12:53:33", + "modified": "2024-04-04T12:53:33.679Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [], + "error_domain": {} + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T13:00:38.650Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 3, + "authorized_users": [ + 5 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "bco_api_user", + "state": "DELETE", + "score": 0, + "last_update": "2024-04-04T04:34:54Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000005/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000006/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000007/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "score": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, + "authorized_users": [] + } + }, + { + "model": "prefix.prefix", + "pk": "BCO", + "fields": { + "certifying_key": "1", + "created": "2024-03-14T13:53:59Z", + "description": "Default prefix for all BioCompute Objects", + "owner": "AnonymousUser", + "counter": 4, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "NOPUB", + "fields": { + "certifying_key": "", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "counter": 0, + "public": false + } + }, + { + "model": "prefix.prefix", + "pk": "TEST", + "fields": { + "certifying_key": "12345", + "created": "2024-03-14T13:53:59Z", + "description": "Test prefix", + "owner": "tester", + "counter": 7, + "public": true + } + } ] \ No newline at end of file diff --git a/tests/fixtures/testing_bcos.py b/tests/fixtures/testing_bcos.py new file mode 100644 index 00000000..6bc3bd83 --- /dev/null +++ b/tests/fixtures/testing_bcos.py @@ -0,0 +1,1013 @@ +#!/usr/bin/env python3 +# tests.fixtures.testing_bcos.py + +from django.conf import settings + +hostname = settings.PUBLIC_HOSTNAME + +BCO_000000_DRAFT = { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.0", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + } + +BCO_000001_DRAFT = { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + } + +NOPUB_000001_DRAFT = { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] +} + +TEST_000001_DRAFT = { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": "", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [ + + ] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [ + + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [ + + ], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [ + + ], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": { + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [ + + ], + "error_domain": { + "empirical_error": { + }, + "algorithmic_error": { + } + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] +} \ No newline at end of file diff --git a/bcodb/__init__.py b/tests/test_apis/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from bcodb/__init__.py rename to tests/test_apis/__init__.py diff --git a/tests/test_views/__init__.py b/tests/test_apis/test_api_authentication/__init__.py similarity index 100% rename from tests/test_views/__init__.py rename to tests/test_apis/test_api_authentication/__init__.py diff --git a/tests/test_apis/test_api_authentication/test_account_activate.py b/tests/test_apis/test_api_authentication/test_account_activate.py new file mode 100644 index 00000000..5a0340fd --- /dev/null +++ b/tests/test_apis/test_api_authentication/test_account_activate.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +"""Test Account Activation +Test for '200: Account has been authorized.', '404: Credentials not found.', +and '403: Requestor's credentials were rejected.' +""" + +import time +from django.test import TestCase, Client + +class ApiAccountsActivateTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self): + self.client = Client() + + def test_account_activated_success(self): + """Test for '201: Account creation request is successful.' + """ + + response = self.client.get( + '/api/accounts/activate/'\ + +'test_new_user%40testing.com/sample_temp_identifier' + ) + self.assertEqual(response.status_code, 200) + + def test_account_activated_forbidden(self): + """Test for '403: Requestor's credentials were rejected.' + """ + + bad_link = "test_new_user%40testing.com/bad_temp_identifier" + response = self.client.get(f'/api/accounts/activate/{bad_link}') + self.assertEqual(response.status_code, 403) + + def test_account_activated_not_found(self): + """Test for '404: That account, {email}, was not found' + """ + + bad_link = "test22%40testing.com/sample_temp_identifier" + response = self.client.get(f'/api/accounts/activate/{bad_link}') + self.assertEqual(response.status_code, 404) + + def test_account_activated_conflict(self): + """Test for '409: CONFLICT: That account, {email}, + has already been activated.' + """ + + bad_link = "tester%40testing.com/sample_temp_identifier" + response = self.client.get(f'/api/accounts/activate/{bad_link}') + self.assertEqual(response.status_code, 409) \ No newline at end of file diff --git a/tests/test_views/test_api_accounts_describe.py b/tests/test_apis/test_api_authentication/test_account_describe.py similarity index 100% rename from tests/test_views/test_api_accounts_describe.py rename to tests/test_apis/test_api_authentication/test_account_describe.py diff --git a/tests/test_views/test_api_account_new.py b/tests/test_apis/test_api_authentication/test_account_new.py similarity index 85% rename from tests/test_views/test_api_account_new.py rename to tests/test_apis/test_api_authentication/test_account_new.py index 4f7fab88..e06a593f 100644 --- a/tests/test_views/test_api_account_new.py +++ b/tests/test_apis/test_api_authentication/test_account_new.py @@ -19,7 +19,7 @@ def test_creation_request_success(self): """ data = { - 'hostname': 'UserDB', + 'hostname': 'http://localhost:8000', 'email': 'test@gwu.edu', 'token': 'SampleToken' } @@ -27,18 +27,14 @@ def test_creation_request_success(self): response = self.client.post('/api/accounts/new/', data=data) self.assertEqual(response.status_code, 201) - # response2 = self.client.get(response.json()['activation_link']) - # self.assertEqual(response2.status_code, 201) def test_creation_request_success_bad_request(self): """Test for '400: Bad request format.' """ data = { 'hostname': 'UserDB', - 'email': 'test@gwu.edu', - # 'token': 'SampleToken' + 'email': 'test@gwu.edu' } - response = self.client.post('/api/accounts/new/', data=data) self.assertEqual(response.status_code, 400) @@ -49,7 +45,7 @@ def test_creation_request_conflict(self): """ data = { - 'hostname': 'UserDB', + 'hostname': 'http://localhost:8000', 'email': 'test@gwu.edu', 'token': 'SampleToken' } diff --git a/tests/test_views/test_api_auth_add.py b/tests/test_apis/test_api_authentication/test_api_auth_add.py similarity index 98% rename from tests/test_views/test_api_auth_add.py rename to tests/test_apis/test_api_authentication/test_api_auth_add.py index 9a701cb7..a7843b3b 100644 --- a/tests/test_views/test_api_auth_add.py +++ b/tests/test_apis/test_api_authentication/test_api_auth_add.py @@ -22,7 +22,7 @@ def test_credentials_created_response(self): """Add authentication is successful (200) """ - token = Token.objects.get(user=User.objects.get(username='test50')).key + token = Token.objects.get(user=User.objects.get(username='tester')).key data = {"iss": "Reeya1","sub": "ReeyaGupta1"} self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) @@ -43,7 +43,7 @@ def test_bad_request_response(self): """Bad request (400) """ - token = Token.objects.get(user=User.objects.get(username='test50')).key + token = Token.objects.get(user=User.objects.get(username='tester')).key data = {"Missing required fields"} self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/auth/add/', data=data, format='json') diff --git a/tests/test_views/test_api_auth_remove.py b/tests/test_apis/test_api_authentication/test_api_auth_remove.py similarity index 98% rename from tests/test_views/test_api_auth_remove.py rename to tests/test_apis/test_api_authentication/test_api_auth_remove.py index ce1cf4d9..150f13e5 100644 --- a/tests/test_views/test_api_auth_remove.py +++ b/tests/test_apis/test_api_authentication/test_api_auth_remove.py @@ -27,7 +27,6 @@ def test_success_response(self): self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/auth/remove/', data=data, format='json') - print(response.json()) self.assertEqual(response.status_code, 200) def test_bad_authentication(self): diff --git a/tests/test_views/test_api_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py similarity index 89% rename from tests/test_views/test_api_auth_reset_token.py rename to tests/test_apis/test_api_authentication/test_api_auth_reset_token.py index 991541ff..8ff77b20 100644 --- a/tests/test_views/test_api_auth_reset_token.py +++ b/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py @@ -19,16 +19,15 @@ def test_reset_successful(self): """Token reset is successful. 200 """ - token = Token.objects.get(user=User.objects.get(username='test50')).key + token = Token.objects.get(user=User.objects.get(username='tester')).key self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/auth/reset_token/') self.assertEqual(response.status_code, 200) def test_invalid_token(self): - """Inclid token. 403 + """Invalid token. 403 """ - # token = Token.objects.get(user=User.objects.get(username='test50')).key token = 'this-is-an-invalid-token' self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/auth/reset_token/') diff --git a/tests/test_apis/test_api_authentication/test_auth_add.py b/tests/test_apis/test_api_authentication/test_auth_add.py new file mode 100644 index 00000000..a7843b3b --- /dev/null +++ b/tests/test_apis/test_api_authentication/test_auth_add.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +"""Add Authentication +Tests for 'New authentication credentials added to existing object' (200), +'Authentication credentials were created and added' (201), 'Bad request' (400), +'That object already exists for this account' (409) +""" + +from django.test import TestCase, Client +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from authentication.models import Authentication + +class AuthenticationTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self): + self.client = APIClient() + + def test_credentials_created_response(self): + """Add authentication is successful (200) + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data) + self.assertEqual(response.status_code, 201) + + def test_credentials_added(self): + """New authentication credentials added to existing object (200) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "new","sub": "new One"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 200) + + def test_bad_request_response(self): + """Bad request (400) + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + data = {"Missing required fields"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 400) + + def test_object_already_exists_response(self): + """That object already exists for this account (409) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 409) diff --git a/tests/test_apis/test_api_authentication/test_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_auth_reset_token.py new file mode 100644 index 00000000..8ff77b20 --- /dev/null +++ b/tests/test_apis/test_api_authentication/test_auth_reset_token.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +"""Reset Token +Tests for 'Token reset is successful.' 200, and 'Bad request.', 400. +""" + +from django.test import TestCase, Client +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User + +class ResetTokenTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self) -> None: + self.client = APIClient() + + def test_reset_successful(self): + """Token reset is successful. 200 + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/reset_token/') + self.assertEqual(response.status_code, 200) + + def test_invalid_token(self): + """Invalid token. 403 + """ + + token = 'this-is-an-invalid-token' + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/reset_token/') + self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_api_authentication/testi_auth_remove.py b/tests/test_apis/test_api_authentication/testi_auth_remove.py new file mode 100644 index 00000000..150f13e5 --- /dev/null +++ b/tests/test_apis/test_api_authentication/testi_auth_remove.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +"""Remove Authentication +Tests for 'Remove authentication is successful.` (200), 'Authentication +failed.' (403), and 'That object does not exist for this account.' (404) +""" + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase + +class AuthenticationRemovetestcase(APITestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self): + self.client = APIClient() + + def test_success_response(self): + """Remove authentication is successful. (200) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data, format='json') + self.assertEqual(response.status_code, 200) + + def test_bad_authentication(self): + """Authentication failed. 403 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data) + self.assertEqual(response.status_code, 403) + + def test_object_already_exists_response(self): + """That object does not exist for this account. 404 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "Reeya2","sub": "ReeyaGupta2"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data) + self.assertEqual(response.status_code, 404) diff --git a/api/validation_definitions/uri_external b/tests/test_apis/test_api_prefix/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from api/validation_definitions/uri_external rename to tests/test_apis/test_api_prefix/__init__.py diff --git a/tests/test_views/test_api_prefixes_create.py b/tests/test_apis/test_api_prefix/test_prefixes_create.py similarity index 67% rename from tests/test_views/test_api_prefixes_create.py rename to tests/test_apis/test_api_prefix/test_prefixes_create.py index 5741a497..702422eb 100644 --- a/tests/test_views/test_api_prefixes_create.py +++ b/tests/test_apis/test_api_prefix/test_prefixes_create.py @@ -28,18 +28,21 @@ class CreatePrefixeTestCase(APITestCase): def setUp(self): self.client= APIClient() - - def test_create_prefix_success(self): - """The prefix was successfully created. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = { + self.data = [{ + "prefix": "test1", + "description": "Test prefix description.", + "public": "true" + }, + { + "prefix": "test2", + "description": "Test prefix description.", + "public": "true" + }] + + self.legacy_data = { "POST_api_prefixes_create": [ { - - - "owner_group": "test_drafter", + "owner_group": "bco_publisher", "owner_user": "bco_api_user", "prefixes": [ { @@ -51,34 +54,25 @@ def test_create_prefix_success(self): ] } + def test_create_prefix_success(self): + """The prefix was successfully created. 201 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/prefixes/create/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_create_prefix_bad_request(self): - """Tests for 'Some or all prefix creations failed. 207.' - 201: The prefix * was successfully created. - 400: Bad Request. The expiration date * is not valid - 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - 403: Forbidden. User does not have permission to perform this action. - 404: Not Found. The user * was not found on the server. - 409: Conflict. The prefix the requestor is attempting to create already exists. + legacy_response = self.client.post('/api/prefixes/create/', data=self.legacy_data, format='json') + response = self.client.post('/api/prefixes/create/', data=self.data, format='json') + self.assertEqual(legacy_response.status_code, 201) + self.assertEqual(response.status_code, 201) + + def test_create_multi_status(self): + """Tests for 'Some prefix creations failed. 207.' """ token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key data = { "POST_api_prefixes_create": [ - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Invalid expiration date.", - "expiration_date": "2023-08-22T09:27:49-0400", - "prefix": "testR" - } - ] - }, { "owner_group": "test_drafter", "owner_user": "bco_api_user", @@ -106,7 +100,7 @@ def test_create_prefix_bad_request(self): "prefixes": [ { "description": "Just a test prefix.", - "prefix": "testR" + "prefix": "test2" }, ] @@ -117,7 +111,7 @@ def test_create_prefix_bad_request(self): "prefixes": [ { "description": "Just a test prefix.", - "prefix": "other" + "prefix": "test" } ] } @@ -126,24 +120,14 @@ def test_create_prefix_bad_request(self): self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/prefixes/create/', data=data, format='json') - # 201: The prefix * was successfully created. - self.assertEqual(response.data[3]['status_code'], "201") - - # 400: Bad Request. The expiration date * is not valid - self.assertIn("not valid either because it does not match the required format 'YYYY-MM-DD-HH-MM-SS'", response.data[0]['message']) - + self.assertEqual(response.data[2]['status_code'], 201) + # 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - self.assertIn('does not follow the naming rules for a prefix.', response.data[1]['message']) - - # TODO => 403: Forbidden. User does not have permission to perform this action. - # This would require testing an instance where the prefix admins was enforced... - - # 404: Not Found. The user * was not found on the server. - self.assertIn('was not found on the server.', response.data[2]['message']) + self.assertIn('prefix', response.data[0]['data']) # 409: Conflict. The prefix the requestor is attempting to create already exists. - self.assertIn('has already been created on this server.', response.data[4]['message']) + self.assertIn('prefix_name', response.data[3]['data']) self.assertEqual(response.status_code, 207) @@ -154,8 +138,6 @@ def test_create_prefix_unauthorized(self): data = { "POST_api_prefixes_create": [ { - - "owner_group": "test_drafter", "owner_user": "bco_api_user", "prefixes": [ diff --git a/tests/test_apis/test_api_prefix/test_prefixes_modify.py b/tests/test_apis/test_api_prefix/test_prefixes_modify.py new file mode 100644 index 00000000..26c014b9 --- /dev/null +++ b/tests/test_apis/test_api_prefix/test_prefixes_modify.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +"""Bulk Create Prefixes +Tests for 'All prefixes were successfully created. 200', 'Some prefix +modifications failed. 207', '400: All modifications failed', and 'Unauthorized. Authentication credentials were +not provided. 401' + +For the 207 response Each object submitted will have it's own response object +with it's own status code and message. These are as follows: + 201: The prefix * was successfully created. + 400: Bad Request. The expiration date * is not valid. + 400: Bad Request. The prefix * does not follow the naming rules for a prefix. + 403: Forbidden. User does not have permission to perform this action. + 404: Not Found. The user * was not found on the server. + 409: Conflict. The prefix the requestor is attempting to create already exists. + """ + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase +from django.contrib.auth.models import Group + +class CreatePrefixeTestCase(APITestCase): + fixtures=['tests/fixtures/test_data'] + + def setUp(self): + + self.client= APIClient() + self.data = [{ + "prefix": "test", + "description": "Test prefix description." + }] + + self.legacy_data = { + "POST_api_prefixes_modify": [ + { + "owner_group": "bco_publisher", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test modification for prefix.", + "prefix": "Test" + } + ] + } + ] + } + + # def test_modify_prefix_success(self): + # """The prefix was successfully modified. 200 + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # legacy_response = self.client.post('/api/prefixes/modify/', data=self.legacy_data, format='json') + # response = self.client.post('/api/prefixes/modify/', data=self.data, format='json') + # self.assertEqual(legacy_response.status_code, 200) + # self.assertEqual(response.status_code, 200) + + # def test_modify_multi_status(self): + # """Tests for 'Some prefix modifications failed. 207.' + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + # data = { + # "POST_api_prefixes_modify": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Invalid prefix naming.", + # "expiration_date": "null", + # "prefix": "invalid-prefix" + # } + # ] + # }, + # { + # "owner_group": "does_not_exist", + # "owner_user": "does_not_exist", + # "prefixes": [ + # { + # "description": "Invalid owner.", + # "prefix": "testR" + # } + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix update.", + # "prefix": "test" + # }, + + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "BCO" + # } + # ] + # } + # ] + # } + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # response = self.client.post('/api/prefixes/modify/', data=data, format='json') + # # 201: The prefix * was successfully created. + # self.assertEqual(response.data[2]['TEST']['status_code'], 200) + + # # 400: Bad Request. The prefix * does not exist. + # self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) + # # 404: Not Found. The user * was not found on the server. + + # # 409: Conflict. The prefix the requestor is attempting to create already exists. + # self.assertIn('permissions', response.data[3]['BCO']['message']) + + # self.assertEqual(response.status_code, 207) + + # def test_create_prefix_unauthorized(self): + # """Unauthorized. Authentication credentials were not provided. 401 + # """ + + # data = { + # "POST_api_prefixes_create": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "testR" + # } + # ] + # } + # ] + # } + + # response = self.client.post('/api/prefixes/create/', data=data, format='json') + # self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_biocompute/__init__.py b/tests/test_apis/test_biocompute/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_apis/test_biocompute/test_objects_drafts_create.py b/tests/test_apis/test_biocompute/test_objects_drafts_create.py new file mode 100644 index 00000000..416b3bef --- /dev/null +++ b/tests/test_apis/test_biocompute/test_objects_drafts_create.py @@ -0,0 +1,135 @@ + +#!/usr/bin/env python3 + +"""Objects/Drafts_create +Tests for 'Creation of BCO draft is successful.' (200), +returns 207, 403 (needs to be reviewed) +""" + + +import json +from django.test import TestCase +from django.conf import settings +from django.contrib.auth.models import User +from rest_framework.authtoken.models import Token +from rest_framework.test import APIClient +from tests.fixtures.testing_bcos import BCO_000001_DRAFT, NOPUB_000001_DRAFT + + +HOSTNAME = settings.PUBLIC_HOSTNAME + +class BcoDraftCreateTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + def setUp(self): + self.client = APIClient() + + self.token = Token.objects.get(user=User.objects.get(username="tester")) + + self.legacy_data = { + "POST_api_objects_draft_create": [ + { + "prefix": "NOPUB", + "owner_group": "tester", + "object_id": f"{HOSTNAME}/NOPUB_000002/DRAFT", + "schema": "IEEE", + "contents": NOPUB_000001_DRAFT + } + ] + } + + self.data = [ + { + "prefix": "BCO", + "authorized_users": ["hivelab"], + "contents": BCO_000001_DRAFT + }, + { + "object_id": f"{HOSTNAME}/TEST_000003/DRAFT", + "prefix": "TEST", + "contents": { + "object_id": f"{HOSTNAME}/TEST_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + } + ] + + def test_legacy_successful_creation(self): + """200: Creation of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', self.legacy_data, format='json') + self.assertEqual(response.status_code, 200) + + def test_successful_creation(self): + """200: Creation of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', self.data, format='json') + self.assertEqual(response.status_code, 200) + + def test_partial_failure(self): + '''Test case for partial failure (response code 300) + Returns 207(Multi status) instead of 300(Partial faliure)''' + data = { + 'POST_api_objects_draft_create': [ + { + 'prefix': 'BCO', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': { + "object_id": f"{HOSTNAME}/BCO_000005", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + }, + { + 'prefix': 'Reeyaa', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': {} + } + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 207) + + def test_bad_request(self): + '''Test case for bad request (response code 400) + Gives 403 forbidden request instead of 400''' + data = [ + { + "object_id": f"{HOSTNAME}/TEST_000001", + # "prefix": "TEST", + "contents": { + "object_id": f"{HOSTNAME}/TEST_000001", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + } + } + ] + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 400) + + def test_invalid_token(self): + '''Test case for invalid token (response code 403) + Setting authentication token to an invalid value''' + + data = { + 'POST_api_objects_draft_create': [ + { + 'prefix': 'BCO', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': {} + }, + + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_biocompute/test_objects_drafts_modify.py b/tests/test_apis/test_biocompute/test_objects_drafts_modify.py new file mode 100644 index 00000000..b649b3ca --- /dev/null +++ b/tests/test_apis/test_biocompute/test_objects_drafts_modify.py @@ -0,0 +1,124 @@ + +#!/usr/bin/env python3 + +"""Objects/Drafts_modify +Tests for 'Modification of BCO draft is successful.' (200), +returns 207, 400, 403 (needs to be reviewed) +""" + + +import json +from django.test import TestCase +from django.contrib.auth.models import User +from rest_framework.authtoken.models import Token +from rest_framework.test import APIClient +from tests.fixtures.testing_bcos import NOPUB_000001_DRAFT, BCO_000000_DRAFT, BCO_000001_DRAFT + +class BcoDraftModifyTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + def setUp(self): + self.client = APIClient() + + self.token = Token.objects.get(user=User.objects.get(username="tester")) + + self.legacy_data = { + "POST_api_objects_drafts_modify": [ + { + # "prefix": "NOPUB", + # "owner_group": "tester", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + # "schema": "IEEE", + "contents": NOPUB_000001_DRAFT + } + ] + } + + self.data = [ + { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "prefix": "BCO", + "authorized_users": ["hivelab"], + "contents": NOPUB_000001_DRAFT + }, + { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "prefix": "TEST", + "authorized_users": ["tester"], + "contents": BCO_000000_DRAFT + } + ] + + # def test_legacy_successful_modification(self): + # """200: Modification of BCO drafts is successful. + # """ + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + # response = self.client.post('/api/objects/drafts/modify/', self.legacy_data, format='json') + # self.assertEqual(response.status_code, 200) + + def test_successful_modification(self): + """200: Modification of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/modify/', self.data, format='json') + self.assertEqual(response.status_code, 200) + + def test_partial_failure(self): + '''Test case for partial failure (response code 300) + Returns 207(Multi status) instead of 300(Partial faliure)''' + data = { + 'POST_api_objects_drafts_modify': [ + { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "prefix": "TEST", + "authorized_users": ["tester"], + "contents": BCO_000000_DRAFT + }, + { + 'prefix': 'Tianyi', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': BCO_000001_DRAFT + } + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/modify/', data=data, format='json') + self.assertEqual(response.status_code, 207) + + def test_bad_request(self): + '''Test case for bad request (response code 400) + Gives 403 forbidden request instead of 400''' + data = [ + { + "object_id": "http://127.0.0.1:8000/TEST_000001", + "contents": { + "object_id": "https://biocomputeobject.org/TEST_000001", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + } + } + ] + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/modify/', data=data, format='json') + self.assertEqual(response.status_code, 400) + + def test_invalid_token(self): + '''Test case for invalid token (response code 403) + Setting authentication token to an invalid value''' + + data = { + 'POST_api_objects_drafts_modify': [ + { + 'prefix': 'BCO', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': BCO_000000_DRAFT + }, + + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') + response = self.client.post('/api/objects/drafts/modify/', data=data, format='json') + self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_biocompute/test_objects_drafts_publish.py b/tests/test_apis/test_biocompute/test_objects_drafts_publish.py new file mode 100644 index 00000000..67b0672e --- /dev/null +++ b/tests/test_apis/test_biocompute/test_objects_drafts_publish.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +"""Tests for DraftsPublishApi [Bulk Enabled] + +DraftsPublishApi: +- checks for legacy submission +- for each object: + - `user_can_publish_bco`: + - checks for published_object_id and makes sure it does not exist + - checks that DRAFT exists + - if published_object_id in request, then checks that published_object_id version matches BCO version + - else checks that draft object_id + version does not exist + - checks if user can publish with prefix of BCO + : `returns DRAFT object` if every check is passed + - `parse_and_validate`: validates BCO. If errors then rejected. + - `publish_draft`: + - copies draft, assignes new ID and status to the copy + - updates the "last_update" field in Django and the BCOs "modified" field + - generates ETag + - saves published object + - if "delete_draft" is true then deletes draft +""" + +import json +from django.test import TestCase +from django.contrib.auth.models import User +from rest_framework.authtoken.models import Token +from rest_framework.test import APIClient + +class BcoDraftPublishTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + def setUp(self): + self.client = APIClient() + + self.token = Token.objects.get(user=User.objects.get(username="tester")) + + self.legacy_data = { + "POST_api_objects_drafts_publish": [ + { + "prefix": "NOPUB", + "owner_group": "tester", + "draft_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "schema": "IEEE", + "delete_draft":"false", + } + ] + } + + self.data = [ + { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "published_object_id": "http://127.0.0.1:8000/BCO_000001/1.1", + "prefix": "BCO", + }, + { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + } + ] + + def test_legacy_successful_publish(self): + """200: Publish of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/publish/', self.legacy_data, format='json') + self.assertEqual(response.status_code, 200) + + def test_successful_publish(self): + """200: publish of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/publish/', self.data, format='json') + self.assertEqual(response.status_code, 200) + + def test_partial_failure(self): + """Test case for partial failure (response code 207) + Returns 207(Multi status)""" + + data = [ + { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "published_object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "prefix": "BCO", + }, + { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + } + ] + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/publish/', data=data, format='json') + self.assertEqual(response.status_code, 207) + diff --git a/tests/test_database.py b/tests/test_database.py deleted file mode 100644 index 859a25d9..00000000 --- a/tests/test_database.py +++ /dev/null @@ -1,45 +0,0 @@ - -from django.test import TestCase -from django.db import connection -from django.conf import settings -import os -import sys - -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -''' - the test confirms that the default database configuration is correctly - set to use SQLite3, as specified in the settings.py file. - -''' - -class DatabaseSettingsTestCase(TestCase): - def test_default_database_configuration(self): - # Retrieve the default database configuration from settings - database_config = settings.DATABASES.get("default", {}) - - # Assert that the database engine is set to SQLite3 - self.assertEqual(database_config["ENGINE"], "django.db.backends.sqlite3") - -''' - This test checks that the database is properly configured and a connection can be established. - identifyies any issues with the database configuration or connectivity. - -''' - -class DatabaseConnectionTestCase(TestCase): - def test_database_connection(self): - #test the database connection - with connection.cursor() as cursor: - cursor.execute("SELECT 1") - - #get the result of the query - result = cursor.fetchone() - - # Assert - self.assertEqual(result[0], 1) - - - def test_database_name(self): - assert settings.DATABASES['default']['NAME'] is not None \ No newline at end of file diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py deleted file mode 100644 index af1c3d9c..00000000 --- a/tests/test_fixtures.py +++ /dev/null @@ -1,119 +0,0 @@ -from django.test import TestCase -from django.db import connections -from pathlib import Path -from django.conf import settings -import shutil -import tempfile - -class DatabaseTestCase(TestCase): - def setUp(self): - # Creating a temporary directory for the databasSSe - self.tmp_dir = tempfile.mkdtemp() - tmp_db_path = Path(self.tmp_dir) / 'db.sqlite3' - - # Copying the dev database to the temporary directory - shutil.copy2(Path(settings.BASE_DIR) / 'admin_only' / 'db.sqlite3.dev', tmp_db_path) - - # Updating the database settings to use the temporary database - settings.DATABASES['default']['NAME'] = str(tmp_db_path) - - # connection to the temporary database - self.connection = connections['default'] - - # def tearDown(self): - # # Cleanup: Delete the temporary directory and database - # shutil.rmtree(self.tmp_dir) - - def test_table_data(self): - - # SQL query to retrieve all table names from the temporary database - with self.connection.cursor() as cursor: - #sqlite_master= special table in SQLite. contains all metadata about db - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - table_names = cursor.fetchall() - - # Print the table names for comparison with the terminal output - print('Table Names:', table_names) - - #self.assertEqual(table_names, expected_data) - - - # sql query to retrieve table data from the temporary database - with self.connection.cursor() as cursor: - cursor.execute('SELECT * FROM authtoken_token;') - table_data = cursor.fetchall() - - print('Fetched Data:', table_data) - - #self.assertEqual(table_data, expected_data) - - - - - - - - - - - - - - - - - - - - - - - - - -''' -def test_my_database_function(dev_database): - # Asserting that the temporary database path is returned by the fixture - assert dev_database.endswith('db.sqlite3.dev') - - # Asserting that the temporary database file exists - assert Path(dev_database).exists() - - # Asserting that database settings have been updated - assert settings.DATABASES['default']['NAME'] == dev_database - - -## -def test_access_database_content(dev_database): - # Perform necessary database operations using the dev_database fixture - # For example, retrieve data from a specific table or perform a query - - # Connectibng to the SQLite database - conn = sqlite3.connect(dev_database) - cursor = conn.cursor() - - # SQL query to retrieve data from the api_bco table - cursor.execute("SELECT * FROM api_bco") - - - rows = cursor.fetchall() - - # Printing data - for row in rows: - print(row) - - # Close the database connection - cursor.close() - conn.close() - - - - # Assert that the SQL query returned the expected results - assert len(results) > 0 - assert results[0][0] == 'expected_value' - - - return 'Test completed successfully' - -''' - diff --git a/tests/test_models b/tests/test_models deleted file mode 100644 index c31f4fd8..00000000 --- a/tests/test_models +++ /dev/null @@ -1,33 +0,0 @@ -from django.test import TestCase -from django.db import models -from django.db.migrations.executor import MigrationExecutor -from django.db.migrations.loader import MigrationLoader -from django.db import connection - - -class DatabaseModelsTestCase(TestCase): - def test_model_definition(self): - #loading migrations - loader = MigrationLoader(connection=connection) - migrations = loader.graph.leaf_nodes(loader.graph.ancestors(loader.graph.leaf_nodes('bco_api'))) - - #all migrations applied??? - executor = MigrationExecutor(connection) - executor.migrate('bco_api', migrations) - - #model classes - models_module = __import__('bco_api.api.model', fromlist=['*']) - model_classes = [getattr(models_module, name) for name in dir(models_module) if - isinstance(getattr(models_module, name), type(models.Model))] - - # can models be used to create database tables??check./ and cretaing temp table to validate model - for model_class in model_classes: - - with connection.schema_editor() as schema_editor: - schema_editor.create_model(model_class) - - # Assertting table exists in the database - self.assertIn(model_class._meta.db_table, connection.introspection.table_names()) - - #del the temp table - schema_editor.delete_model(model_class) diff --git a/tests/test_views/test_api_account_activate.py b/tests/test_views/test_api_account_activate.py deleted file mode 100644 index 8df258fe..00000000 --- a/tests/test_views/test_api_account_activate.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -"""Test Account Activation -Test for '200: Account has been authorized.', '404: Credentials not found.', -and '403: Requestor's credentials were rejected.' -""" - -import time -from django.test import TestCase, Client - -class ApiAccountsActivateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = Client() - data = { - 'hostname': 'UserDB', - 'email': 'test@gwu.edu', - 'token': 'SampleToken' - } - - self.initial_response = self.client.post('/api/accounts/new/', data=data).json() - - def test_account_activated_forbidden(self): - """Test for '403: Requestor's credentials were rejected.' - """ - - bad_link = self.initial_response['activation_link']+ "bad_content" - response = self.client.get(bad_link) - self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_views/test_api_groups_group_info.py b/tests/test_views/test_api_groups_group_info.py deleted file mode 100644 index 4af7599b..00000000 --- a/tests/test_views/test_api_groups_group_info.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 - -"""Group info -Tests for 'Success. Group permissions returned (200)', 'Forbidden. Invalid -token. (403)', Forbidden response (400) -""" - - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from api.model.groups import GroupInfo, Group - - -class GroupInfoAPITestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_success_response(self): - """Tests for 'Success. Group permissions returned (200)' - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_groups_info": { - "names": [ - "bco_drafter", "bco_publisher", "test50", "test_drafter", "other_drafter" - ] - } - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/groups/group_info/', data=data, format='json') - - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()), 5) - - def test_unauthorized(self): - """Tests for 'Forbidden. Invalid token. (403)' - """ - - data = { - "POST_api_groups_info": { - "names": [ - "bco_drafter", "bco_publisher", "test50", "test_drafter" - ] - } - } - - response = self.client.post('/api/groups/group_info/', data=data, format='json') - self.assertEqual(response.status_code, 403) - - def test_unauthorized_response(self): - """ - """ - - data = { - "POST_api_groups_info": { - "names": [ - "bco_drafter", "bco_publisher", "test50", "test_drafter" - ] - } - } - - self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') - response = self.client.post('/api/groups/group_info/', data=data, format='json') - self.assertEqual(response.status_code, 403) - - def test_bad_request(self): - """ - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_groups_info": { - "bad_names": { - "bco_drafter", "bco_publisher", "test50", "test_drafter" - } - } - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/groups/group_info/', data=data, format='json') - self.assertEqual(response.status_code, 400) \ No newline at end of file diff --git a/tests/test_views/test_api_groups_modify.py b/tests/test_views/test_api_groups_modify.py deleted file mode 100644 index abd7d829..00000000 --- a/tests/test_views/test_api_groups_modify.py +++ /dev/null @@ -1,90 +0,0 @@ - -#!/usr/bin/env python3 - -"""Group info -Tests for 'Authorization is successful. Group permissions returned' (200), -Forbidden response (400) -""" - - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User, Group -from api.models import BCO -from api.model.groups import GroupInfo - - -class GroupInfoAPITestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_success_response(self): - """Successful request with authentication data - """ - - old_name = "test_drafter" - new_name = "new_name" - old_bco_counts = len(BCO.objects.filter(owner_group=old_name)) - old_group_counts = len(Group.objects.filter(name=old_name)) - old_groupInfo_counts = len(GroupInfo.objects.filter(group=old_name)) - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_groups_modify": [ - { - "name": old_name, - "actions": { - "rename": new_name - } - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/groups/modify/', data=data, format='json') - new_bco_counts = len(BCO.objects.filter(owner_group=new_name)) - new_group_counts = len(Group.objects.filter(name=new_name)) - new_groupInfo_counts = len(GroupInfo.objects.filter(group=new_name)) - self.assertEqual(response.status_code, 200) - self.assertEqual(new_bco_counts, old_bco_counts) - self.assertEqual(new_group_counts, old_group_counts) - self.assertEqual(new_groupInfo_counts, old_groupInfo_counts) - - def test_bad_request_response(self): - """Bad request: Authorization is not provided in the request headers - Gives 403 instead of 400 - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_groups_info": { - "names": ["anon", "wheel"] - } - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/groups/modify/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_unauthorized_response(self): - # Unauthorized: Authentication credentials were not valid - #Gives 403 instead of 401 - - data = { - "POST_api_groups_modify": [ - { - "name": "old_name", - "actions": { - "rename": "new_name" - } - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') - response = self.client.post('/api/groups/group_info/', data=data, format='json') - self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_views/test_api_objects.py b/tests/test_views/test_api_objects.py deleted file mode 100644 index 780f2257..00000000 --- a/tests/test_views/test_api_objects.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 - -"""Search the BCODB -Tests for endpoint for use of query string based search. - Four parameters are defined by this API: - 1. contents: Search in the contents of the BCO - 2. prefix: BCO Prefix to search - 3. owner_user: Search by BCO owner - 4. object_id: BCO object_id to search for -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase - -class ObjectsTestCase(APITestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_search_contents(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('http://localhost:8000/api/objects/?contents=review') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 13) - - def test_search_prefix(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('/api/objects/?prefix=TEST') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 3) - - def test_search_owner_user(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('http://localhost:8000/api/objects/?owner_user=test50') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 5) - - def test_search_object_id(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('http://localhost:8000/api/objects/?object_id=DRAFT') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 6) - - def test_search_all(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('/api/objects/?') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 13) - - def test_search_multi_value(self): - """Search successfull. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('http://localhost:8000/api/objects/?contents=HCV&contents=DRAFT') - self.assertEqual(response.status_code, 200) - self.assertEqual(len(response.json()[0]), 2) \ No newline at end of file diff --git a/tests/test_views/test_api_objects_drafts_create.py b/tests/test_views/test_api_objects_drafts_create.py deleted file mode 100644 index 1808087b..00000000 --- a/tests/test_views/test_api_objects_drafts_create.py +++ /dev/null @@ -1,111 +0,0 @@ - -#!/usr/bin/env python3 - -"""Objects/Drafts_create -Tests for 'Creation of BCO draft is successful.' (200), -returns 207, 403 (needs to be reviewed) -""" - - -import json -from django.test import TestCase -from django.contrib.auth.models import User -from rest_framework.authtoken.models import Token -from rest_framework.test import APIClient - -class BcoDraftCreateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - # Checking if the user 'bco_api_user' already exists - try: - self.user = User.objects.get(username='bco_api_user') - except User.DoesNotExist: - self.user = User.objects.create_user(username='bco_api_user') - - # Checking if user already has token, if not then creating one - if not Token.objects.filter(user=self.user).exists(): - self.token = Token.objects.create(user=self.user) - else: - self.token = Token.objects.get(user=self.user) - - def test_successful_creation(self): - """200: Creation of BCO draft is successful. - """ - - - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data, format='json') - self.assertEqual(response.status_code, 200) - - def test_partial_failure(self): - # Test case for partial failure (response code 300) - ##Returns 207(Multi status) instead of 300(Partial faliure) - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - { - 'prefix': 'Reeyaa', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 207) - - def test_bad_request(self): - # Test case for bad request (response code 400) - #Gives 403 forbidden request instead of 400 - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - - ] - } - #self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) - - def test_invalid_token(self): - # Test case for invalid token (response code 403) - # Setting authentication token to an invalid value - - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_views/test_api_objects_drafts_modify.py b/tests/test_views/test_api_objects_drafts_modify.py deleted file mode 100644 index 1e1aa8d2..00000000 --- a/tests/test_views/test_api_objects_drafts_modify.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python3 - -"""Bulk Modify BCO Draft -Tests for 200: 'All modifications of BCO drafts are successful.', -401: 'Unauthorized. Authentication credentials were not provided.', -400: 'Bad request.', 403: 'Invalid token.' and 207: 'Some or all BCO -modifications failed. Each object submitted will have it's own response object -with it's own status code and message: - "200: Success. The object with ID <'object_id'> was" - "updated.\n" - "400: Bad request. The request could not be processed with" - "the parameters provided." - "401: Prefix unauthorized. The token provided does not" - "have draft permissions for this prefix .\n" - "404: Not Found. The object ID was not found" - "on the server.\n" - "409: Conflict. The provided object_id does" - "not match the saved draft object_id ." - "Once a draft is created you can not change the object" - "id.\n", -""" - -import json -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase -from api.models import BCO - -class ModifyBCODraftTestCase(APITestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_modify_bco_draft_success(self): - """Tests for 200: 'All modifications of BCO drafts are successful.' - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - bco_0 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000000/DRAFT').contents - bco_1 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000001/DRAFT').contents - - bco_0['provenance_domain']['version'] = '99.9' - bco_1['provenance_domain']['version'] = '88.8' - - submission = { - "POST_api_objects_drafts_modify": [ - { - "object_id": bco_0['object_id'], - "contents": bco_0 - }, - { - "object_id": bco_1['object_id'], - "contents": bco_1 - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/modify/', data=submission, format='json') - test_case_1 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000001/DRAFT').contents - - self.assertEqual(test_case_1['provenance_domain']['version'], '88.8') - self.assertEqual(response.status_code, 200) - - def test_bulk_modification_fail(self): - """Test for 207: 'Some or all BCO modifications failed. Each object - submitted will have it's own response object with it's own status - code and message: - "200: Success. The object with ID <'object_id'> was" - "updated.\n" - "400: Bad request. The request could not be processed with" - "the parameters provided." - "401: Prefix unauthorized. The token provided does not" - "have draft permissions for this prefix .\n" - "404: Not Found. The object ID was not found" - "on the server.\n" - "409: Conflict. The provided object_id does" - "not match the saved draft object_id ." - "Once a draft is created you can not change the object" - "id.\n", - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - bco_0 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000000/DRAFT').contents - bco_1 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000001/DRAFT').contents - bco_3 = BCO.objects.get(object_id='http://127.0.0.1:8000/OTHER_000001/DRAFT').contents - - bco_0['provenance_domain']['version'] = '88.8' - bco_1['object_id'] = 'http://127.0.0.1:8000/BCO_100000/DRAFT' - - submission = { - "POST_api_objects_drafts_modify": [ - { - "object_id": bco_0['object_id'], - "contents": bco_0 - }, - { - "object_id": 'object_id', - "contents": bco_0 - }, - { - "object_id": 'http://127.0.0.1:8000/BCO_100000/DRAFT', - "contents": bco_1 - }, - { - "object_id": 'http://127.0.0.1:8000/BCO_000000/DRAFT', - "contents": bco_1 - }, - { - "object_id": bco_3['object_id'], - "contents": bco_3 - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/modify/', data=submission, format='json') - - self.assertEqual(response.status_code, 207) - self.assertEqual(response.json()[0]['status_code'], '200') - self.assertEqual(response.json()[1]['status_code'], '400') - self.assertEqual(response.json()[2]['status_code'], '404') - self.assertEqual(response.json()[3]['status_code'], '409') - self.assertEqual(response.json()[4]['status_code'], '401') - - def test_unauthorized(self): - """Test for 401: Unauthorized. Authentication credentials were not - provided. - """ - bco_0 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000000/DRAFT').contents - bco_1 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000001/DRAFT').contents - - bco_0['provenance_domain']['version'] = '99.9' - bco_1['provenance_domain']['version'] = '88.8' - - submission = { - "POST_api_objects_drafts_modify": [ - { - "object_id": bco_0['object_id'], - "contents": bco_0 - }, - { - "object_id": bco_1['object_id'], - "contents": bco_1 - } - ] - } - - response = self.client.post('/api/objects/drafts/modify/', data=submission, format='json') - self.assertEqual(response.status_code, 403) - - def test_bad_request(self): - """Test for 400: Bad request. - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - bco_0 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000000/DRAFT').contents - bco_1 = BCO.objects.get(object_id='http://127.0.0.1:8000/BCO_000001/DRAFT').contents - - bco_0['provenance_domain']['version'] = '99.9' - bco_1['provenance_domain']['version'] = '88.8' - - submission = { - "POST": [ - { - "object_id": bco_0['object_id'], - "contents": bco_0 - }, - { - "object_id": bco_1['object_id'], - "contents": bco_1 - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/modify/', data=submission, format='json') - self.assertEqual(response.status_code, 400) - - def test_invalid_token(self): - """Test for 403: Invalid token. - """ - - submission = {} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + 'token') - response = self.client.post('/api/objects/drafts/modify/', data=submission, format='json') - self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_views/test_api_objects_drafts_publish.py b/tests/test_views/test_api_objects_drafts_publish.py deleted file mode 100644 index e7087ac0..00000000 --- a/tests/test_views/test_api_objects_drafts_publish.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python3 - -"""Test Bulk Publish BCOs -Tests for 'All BCO publications successful.' (200), 'Some or all publications -failed.' (207), 'Bad request.' (400), and 'Authentication credentials were not -provided.' (404) -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase, APIClient - - -class PublishDraftBCOTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - def test_publish_bco_success(self): - """All BCO publications successful (200) - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_objects_drafts_publish": [ - { - "prefix": "BCO", - "draft_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", - "delete_draft": False - }, - { - "prefix": "BCO", - "draft_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "object_id" "http://127.0.0.1:8000/BCO_000000/1.1" - "delete_draft": False - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/publish/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_publish_bco_partial_failure(self): - """Some or all publications failed (207) - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_objects_drafts_publish": [ - { - "prefix": "BCO", - "draft_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "delete_draft": False - }, - { - "prefix": "InvalidPrefix", - "draft_id": "InvalidDraftId", - "delete_draft": False - } - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/publish/', data=data, format='json') - self.assertEqual(response.status_code, 207) - - def test_publish_bco_bad_request(self): - """Bad request (400) - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_wrong_thing": [ - { - "prefix": "BCO", - #"draft_id": "InvalidID", - "delete_draft": False - } - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/objects/drafts/publish/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_publish_bco_invalid_token(self): - """Authentication credentials were not provided. (404) - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - - data = { - "POST_api_objects_drafts_publish": [ - { - "prefix": "BCO", - "draft_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", - - "delete_draft": False - - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='invalid token') - response = self.client.post('/api/objects/drafts/publish/', data=data, format='json') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_views/test_api_objects_search.py b/tests/test_views/test_api_objects_search.py deleted file mode 100644 index fe5e74ce..00000000 --- a/tests/test_views/test_api_objects_search.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -"""Objects Search -Tests for successful search (status code: 200), -prefix not found (status code: 404) -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase - -#TODO: this needs refinement -class ObjectsSearchTestCase(APITestCase): - - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - def test_search_successful(self): - """Test case for a successful search (status code: 200) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = { - "POST_api_objects_search": [ - { - "type": "prefix", - "search": "TEST" - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post("/api/objects/search/", data=data, format="json") - self.assertEqual(response.status_code, 200) - - def test_prefix_not_found(self): - """Test case for prefix not found (status code: 404) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = { - "POST_api_objects_search": [ - { - "type": "prefix", - "search": "invalidprefix" - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post("/api/objects/search/", data=data, format="json") - self.assertEqual(response.status_code, 404) \ No newline at end of file diff --git a/tests/test_views/test_api_objects_validate.py b/tests/test_views/test_api_objects_validate.py deleted file mode 100644 index 1d4bf9dc..00000000 --- a/tests/test_views/test_api_objects_validate.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python3 - -"""Bulk Validate BCOs -Tests for 'Success. All BCOs are valid (200)', 'Forbidden. Invalid -token. (403)', Forbidden response (400) -""" - -import json -from api.models import BCO -from django.contrib.auth.models import User -from django.test import TestCase -from rest_framework.test import APIClient - -class BcoValidateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - self.bco_1 = BCO.objects.filter(object_id__icontains='TEST_000001/DRAFT')[0].contents - self.bco_2 = {} - self.bco_3 = { - "object_id": "", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", - "provenance_domain": { - "name": "", - "version": "", - "license": "", - "created": "2023-09-05T18:10:23", - "modified": "2023-09-05T18:10:23.167Z", - "contributors": [ - { - "name": "", - "affiliation": "", - "email": "", - "contribution": [], - "orcid": "" - } - ] - }, - "usability_domain": [], - "description_domain": { - "pipeline_steps": [] - }, - "parametric_domain": [], - "io_domain": {}, - "execution_domain": { - "script": [], - "script_driver": "", - "software_prerequisites": [], - "external_data_endpoints": [], - "environment_variables": {} - }, - "extension_domain": [] - } - - - def test_successful_validation(self): - """Test case for failed validation (response code 207) - """ - - data = { - "POST_validate_bco": [ - self.bco_1 - ] - } - - response = self.client.post('/api/objects/validate/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_unsuccessful_validation(self): - """Test case for successful validation (response code 201) - """ - - data = { - "POST_validate_bco": [ - self.bco_1, - self.bco_2, - self.bco_3 - ] - } - - response = self.client.post('/api/objects/validate/', data=data, format='json') - # Test for successfull validation - self.assertEqual(response.json()[self.bco_1['object_id']]['number_of_errors'], 0) - # Test for failed validation: Empty object - self.assertEqual(response.json()['1']['number_of_errors'], 1) - # Test for failed validation: Blank object - self.assertEqual(response.json()['2']['number_of_errors'], 3) - self.assertEqual(response.status_code, 207) diff --git a/tests/test_views/test_api_prefixes_token.py b/tests/test_views/test_api_prefixes_token.py deleted file mode 100644 index 36adb798..00000000 --- a/tests/test_views/test_api_prefixes_token.py +++ /dev/null @@ -1,46 +0,0 @@ - -#!/usr/bin/env python3 - -"""Prefixes token -Tests for 'Successful request' (200), -'forbiddden' (403) -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase - - -class PrefixesTokenTestCase(APITestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_success_response(self): - """The available prefixes were returned. (200)""" - - token = Token.objects.get( - user=User.objects.get(username='bco_api_user') - ).key - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/prefixes/token/',data={}) - self.assertEqual(response.status_code, 200) - - - # def test_unauthorized_response(self): - # """The authorization header was not provided. (401)""" - - # self.client.credentials() - # response = self.client.post('/api/prefixes/token/') - # self.assertEqual(response.status_code, 401) - - def test_invalid_token(self): - """Invalid token(403)""" - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + "token") - response = self.client.post('/api/prefixes/token/') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_views/test_get_object_id_draft.py b/tests/test_views/test_get_object_id_draft.py deleted file mode 100644 index ddc4a1e3..00000000 --- a/tests/test_views/test_get_object_id_draft.py +++ /dev/null @@ -1,53 +0,0 @@ - -#!/usr/bin/env python3 - -""" Get Draft BCO - -""" - -import unittest -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User - -class GetDraftBcoTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_get_draft(self): - """Test a successful response with status code 201 - """ - token = Token.objects.get(user=User.objects.get(username='test50')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('/BCO_000000/DRAFT', format='json') - self.assertEqual(response.status_code, 200) - - def test_no_credentials(self): - """Test for '403: Authentication credentials were not provided.' - """ - - response = self.client.get('/BCO_000000/DRAFT', format='json') - self.assertEqual(response.status_code, 403) - - def test_dne(self): - """Test for '404: Not found. That draft could not be found on the - server.' - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('/BCO_000100/DRAFT', format='json') - self.assertEqual(response.status_code, 404) - - def test_unauthorized(self): - """Test for '404: Not found. That draft could not be found on the - server.' - """ - - token = Token.objects.get(user=User.objects.get(username='test50')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.get('/OTHER_000001/DRAFT', format='json') - self.assertEqual(response.status_code, 401) \ No newline at end of file diff --git a/tests/test_views/test_get_objectid.py b/tests/test_views/test_get_objectid.py deleted file mode 100644 index 9733b9d9..00000000 --- a/tests/test_views/test_get_objectid.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -"""Get ObjectID -Tests for 'Successful request with valid object id' (200), -'Request with a non-existent object ID' (404), 'unauthorized- Request without authentication credentials' (401), -'Forbidden- Request with valid object ID but unauthorized user' (403) -""" - - -from django.test import TestCase -from rest_framework.test import APIClient - -class BCOViewTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - def test_view_published_bco_success(self): - """Successful request with valid object ID - """ - - object_id = "TEST_000001" - response = self.client.get(f'/{object_id}') - self.assertEqual(response.status_code, 200) - - def test_view_published_bco_not_found(self): - # Request with a non-existent object ID - object_id = "invalid_object_id" - response = self.client.get(f'{object_id}/') - self.assertEqual(response.status_code, 404) diff --git a/tests/test_views/test_published_object_by_id.py b/tests/test_views/test_published_object_by_id.py deleted file mode 100644 index d5a599cc..00000000 --- a/tests/test_views/test_published_object_by_id.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 - -"""Root Object Id Testing -Tests for 'Object Found' (200) and 'Object Not Found'(404) -""" - -from django.test import TestCase -from rest_framework.test import APIClient - -class ObjectIdRootObjectIdTest(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def test_seccussfull_retrieval(self): - """200: Object returned. - """ - - client = APIClient() - response = self.client.get('/BCO_000001') - self.assertEqual(response.status_code, 200) - - def test_object_not_found(self): - """404: Object not found. - """ - - response = self.client.get('/BCO_001000') - self.assertEqual(response.status_code, 404) \ No newline at end of file diff --git a/token.json b/token.json deleted file mode 100644 index 50e4e407..00000000 --- a/token.json +++ /dev/null @@ -1 +0,0 @@ -[{"model": "authtoken.token", "pk": "00edb7f97f1a100c1d1b463f167908d178158b6d", "fields": {"user": 2, "created": "2023-10-11T16:46:23.774Z"}}, {"model": "authtoken.token", "pk": "627626823549f787c3ec763ff687169206626149", "fields": {"user": 4, "created": "2023-10-11T16:46:23.782Z"}}, {"model": "authtoken.token", "pk": "a8d6fb100e9f46925dda3efe46870d2bb34e03a9", "fields": {"user": 1, "created": "2023-10-11T16:46:23.765Z"}}, {"model": "authtoken.token", "pk": "b64fffb062421c1ea17fe7e0034484e494708f63", "fields": {"user": 3, "created": "2023-10-11T16:46:23.778Z"}}, {"model": "authtoken.token", "pk": "c0d204332435783b4b745eca04a6946d8c124b38", "fields": {"user": 5, "created": "2023-10-11T16:46:23.867Z"}}] \ No newline at end of file