Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: find best_detections for classifications (#631) #632

Merged
merged 3 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 39 additions & 38 deletions core/_cli/migrations/20240404131807-create-best-detections.ts.js
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this migration gets run again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only somewhere where we did not have it yet (probably not)

Original file line number Diff line number Diff line change
Expand Up @@ -56,46 +56,47 @@ module.exports = {
)
`, { transaction: t })

const jobs = await queryInterface.sequelize.query('select "id", "query_start", "query_end" from public.classifier_jobs', {
transaction: t,
type: Sequelize.QueryTypes.SELECT,
raw: true
})
// NOT USED SINCE IT'S rewritten in the next migration
// const jobs = await queryInterface.sequelize.query('select "id", "query_start", "query_end" from public.classifier_jobs', {
// transaction: t,
// type: Sequelize.QueryTypes.SELECT,
// raw: true
// })

for (const job of jobs) {
const replacements = {
classifierJobId: job.id,
perDayLimit: 10,
perStreamLimit: 10,
jobStart: job.query_start,
jobEnd: job.query_end
}
// for (const job of jobs) {
// const replacements = {
// classifierJobId: job.id,
// perDayLimit: 10,
// perStreamLimit: 10,
// jobStart: job.query_start,
// jobEnd: job.query_end
// }

await queryInterface.sequelize.query(`
INSERT INTO public.best_detections
SELECT
"detection_id", "start", "stream_id", "classifier_job_id", "confidence", "daily_ranking", "stream_ranking"
FROM (
SELECT
"id" as "detection_id", "start", "stream_id", "classifier_job_id", "confidence",
ROW_NUMBER() OVER(
PARTITION BY stream_id, date(timezone('UTC', "start"))
ORDER BY confidence DESC
) as daily_ranking,
ROW_NUMBER() OVER(
PARTITION BY stream_id
ORDER BY confidence DESC
) as stream_ranking
FROM public.detections
WHERE (start BETWEEN :jobStart AND :jobEnd) AND classifier_job_id = :classifierJobId
) as detection
WHERE daily_ranking < :perDayLimit OR stream_ranking < :perStreamLimit;
`, {
replacements,
type: Sequelize.QueryTypes.RAW,
transaction: t
})
}
// await queryInterface.sequelize.query(`
// INSERT INTO public.best_detections
// SELECT
// "detection_id", "start", "stream_id", "classifier_job_id", "confidence", "daily_ranking", "stream_ranking"
// FROM (
// SELECT
// "id" as "detection_id", "start", "stream_id", "classifier_job_id", "confidence",
// ROW_NUMBER() OVER(
// PARTITION BY stream_id, date(timezone('UTC', "start"))
// ORDER BY confidence DESC
// ) as daily_ranking,
// ROW_NUMBER() OVER(
// PARTITION BY stream_id
// ORDER BY confidence DESC
// ) as stream_ranking
// FROM public.detections
// WHERE (start BETWEEN :jobStart AND :jobEnd) AND classifier_job_id = :classifierJobId
// ) as detection
// WHERE daily_ranking < :perDayLimit OR stream_ranking < :perStreamLimit;
// `, {
// replacements,
// type: Sequelize.QueryTypes.RAW,
// transaction: t
// })
// }
veckatimest marked this conversation as resolved.
Show resolved Hide resolved
})
},
down: async (queryInterface) => {
Expand Down
143 changes: 143 additions & 0 deletions core/_cli/migrations/20240613065719-best-detections-for-species.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
'use strict'

module.exports = {
up: async (queryInterface, Sequelize) => {
return queryInterface.sequelize.transaction(async t => {
await queryInterface.sequelize.query('DELETE FROM public."best_detections"', {
transaction: t
})

await queryInterface.addColumn(
'best_detections',
'classification_id',
{
type: Sequelize.INTEGER,
allowNull: false,
references: {
model: {
tableName: 'classifications'
},
key: 'id'
}
},
{ transaction: t }
)
await queryInterface.renameColumn(
'best_detections',
'daily_ranking',
'stream_daily_ranking',
{ transaction: t }
)
await queryInterface.addColumn(
'best_detections',
'stream_classification_ranking',
{
type: Sequelize.INTEGER,
allowNull: false
},
{ transaction: t }
)
await queryInterface.addColumn(
'best_detections',
'stream_classification_daily_ranking',
{
type: Sequelize.INTEGER,
allowNull: false
},
{ transaction: t }
)

const jobs = await queryInterface.sequelize.query('select "id", "query_start", "query_end" from public.classifier_jobs', {
transaction: t,
type: Sequelize.QueryTypes.SELECT,
raw: true
})

for (const job of jobs) {
const replacements = {
classifierJobId: job.id,
dayLimit: 10,
limit: 10,
streamClassificationDayLimit: 5,
jobStart: job.query_start,
jobEnd: job.query_end
}

await queryInterface.sequelize.query(`
INSERT INTO public.best_detections
(
"detection_id",
"start", "stream_id", "classifier_job_id", "confidence", "classification_id",
"stream_ranking",
"stream_daily_ranking",
"stream_classification_ranking",
"stream_classification_daily_ranking"
)
SELECT
"detection_id",
"start", "stream_id", "classifier_job_id", "confidence", "classification_id"
"stream_ranking",
"stream_daily_ranking",
"stream_classification_ranking",
"stream_classification_daily_ranking",
FROM (
SELECT
"id" as "detection_id",
"start", "stream_id", "classifier_job_id", "confidence", "classification_id",
ROW_NUMBER() OVER(
PARTITION BY stream_id
ORDER BY confidence DESC
) as stream_ranking,
ROW_NUMBER() OVER(
PARTITION BY stream_id, date(timezone('UTC', "start"))
ORDER BY confidence DESC
) as stream_daily_ranking,
ROW_NUMBER() OVER(
PARTITION BY stream_id, classification_id
ORDER BY confidence DESC
) as stream_classification_ranking,
ROW_NUMBER() OVER(
PARTITION BY stream_id, classification_id, date(timezone('UTC', "start"))
ORDER BY confidence DESC
) as stream_classification_daily_ranking
FROM public.detections
WHERE (start BETWEEN :jobStart AND :jobEnd) AND classifier_job_id = :classifierJobId
) as detection
WHERE stream_ranking < :limit OR stream_classification_ranking < :limit OR
stream_daily_ranking < :dayLimit OR
stream_classification_daily_ranking < :streamClassificationDayLimit;`,
{
replacements,
type: Sequelize.QueryTypes.RAW,
transaction: t
})
}
})
},

down: async (queryInterface, Sequelize) => {
return queryInterface.sequelize.transaction(async t => {
await queryInterface.removeColumn(
'best_detections',
'classification_id',
{ transaction: t }
)
await queryInterface.renameColumn(
'best_detections',
'stream_daily_ranking',
'daily_ranking',
{ transaction: t }
)
await queryInterface.removeColumn(
'best_detections',
'stream_classification_ranking',
{ transaction: t }
)
await queryInterface.removeColumn(
'best_detections',
'stream_classification_daily_ranking',
{ transaction: t }
)
})
}
}
17 changes: 15 additions & 2 deletions core/_models/detections/best-detection.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,25 @@ module.exports = (sequelize, DataTypes) => {
type: DataTypes.FLOAT,
allowNull: false
},
dailyRanking: {
classificationId: {
type: DataTypes.INTEGER,
allowNull: false
allowNull: true
},
streamRanking: {
type: DataTypes.INTEGER,
allowNull: false
},
streamDailyRanking: {
type: DataTypes.INTEGER,
allowNull: false
},
streamClassificationRanking: {
type: DataTypes.INTEGER,
allowNull: false
},
streamClassificationDailyRanking: {
type: DataTypes.INTEGER,
allowNull: false
}
}, {
underscored: true,
Expand All @@ -36,6 +48,7 @@ module.exports = (sequelize, DataTypes) => {
BestDetection.associate = function (models) {
BestDetection.belongsTo(models.Detection, { as: 'detection', foreignKey: 'detection_id' })
BestDetection.belongsTo(models.ClassifierJob, { as: 'classifier_job', foreignKey: 'classifier_job_id' })
BestDetection.belongsTo(models.Classification, { as: 'classification', foreignKey: 'classification_id' })
}
return BestDetection
}
2 changes: 1 addition & 1 deletion core/_models/detections/detection.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ module.exports = (sequelize, DataTypes) => {
Detection.hasOne(models.BestDetection, { as: 'bestDetection', foreignKey: 'detection_id' })
}
Detection.attributes = {
lite: ['stream_id', 'start', 'end', 'confidence'],
lite: ['id', 'stream_id', 'start', 'end', 'confidence'],
full: ['id', 'stream_id', 'classifier_id', 'classification_id', 'classifier_job_id', 'start', 'end', 'confidence', 'review_status']
}
return Detection
Expand Down
54 changes: 28 additions & 26 deletions core/detections/best-detections-summary.int.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,6 @@ let classifierJobs = [
}
]

afterAll(async () => {
await models.BestDetection.destroy({ where: {} })
await models.Detection.destroy({ where: {} })
})

function oneDetection (partialDetection) {
return {
streamId: streams[0].id,
Expand All @@ -123,21 +118,24 @@ let job2Stream1Day2BestDetection
async function makeManyDetections () {
const arbitraryDetections = []
streams.forEach((stream) => {
// we want 6 days of detections, 20 detections each day
for (let day = 1; day < 7; day++) {
let date = new Date('2024-01-01T08:00:00.000Z').setUTCDate(day).valueOf()

for (let i = 0; i < 20; i++) {
arbitraryDetections.push(oneDetection({
streamId: stream.id,
start: new Date(date),
end: new Date(date + 5000),
confidence: 0.7 + Math.random() / 10
}))

date += 60 * 60 * 1000
classifications.forEach((classification) => {
// we want 6 days of detections, 20 detections each day
veckatimest marked this conversation as resolved.
Show resolved Hide resolved
for (let day = 1; day < 7; day++) {
let date = new Date('2024-01-01T08:00:00.000Z').setUTCDate(day).valueOf()

for (let i = 0; i < 10; i++) {
arbitraryDetections.push(oneDetection({
streamId: stream.id,
classificationId: classification.id,
start: new Date(date),
end: new Date(date + 5000),
confidence: 0.7 + Math.random() / 10
}))

date += 60 * 60 * 1000
}
}
}
})
})

stream1Day1BestDetection = oneDetection({
Expand Down Expand Up @@ -211,11 +209,15 @@ async function makeManyDetections () {

beforeAll(async () => {
muteConsole('warn')
await truncateNonBase()
await truncateNonBase(models)

await models.ClassifierJob.destroy({ where: {}, force: true })
await models.Classifier.destroy({ where: {}, force: true })

await models.Project.create(project)
await models.Stream.bulkCreate(streams)
await models.Classification.bulkCreate(classifications)

await models.Classifier.bulkCreate(classifiers)
classifierJobs = await models.ClassifierJob.bulkCreate(classifierJobs)

Expand All @@ -229,7 +231,7 @@ beforeAll(async () => {
describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should return right best per stream detections', async () => {
const query = {
n_per_stream: 2
n_per_chunk: 2
}

const response = await request(app).get(`/${classifierJobs[0].id}/best-detections/summary`).query(query)
Expand All @@ -245,7 +247,7 @@ describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should return right best per day detections', async () => {
const query = {
by_date: true,
n_per_stream: 2,
n_per_chunk: 2,
start: '2024-01-01T00:00:00.000Z',
end: '2024-01-04T00:00:00.000Z'
}
Expand All @@ -263,7 +265,7 @@ describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should respect stream_ids in best per day', async () => {
const query = {
by_date: true,
n_per_stream: 1,
n_per_chunk: 1,
start: '2024-01-01T00:00:00.000Z',
end: '2024-01-04T00:00:00.000Z',
streams: [streams[0].id, streams[1].id]
Expand All @@ -281,7 +283,7 @@ describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should respect stream_ids in best per stream', async () => {
const query = {
by_date: false,
n_per_stream: 2,
n_per_chunk: 2,
streams: [streams[0].id, streams[1].id]
}

Expand All @@ -297,7 +299,7 @@ describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should respect review statuses', async () => {
const query = {
by_date: false,
n_per_stream: 10, // max
n_per_chunk: 10, // max
review_statuses: ['uncertain', 'confirmed']
}

Expand All @@ -316,7 +318,7 @@ describe('GET /classifier-jobs/:id/best-detections/summary', () => {
test('should only find detections in requested job', async () => {
const query = {
by_date: false,
n_per_stream: 10 // max
n_per_chunk: 10 // max
}

const response = await request(app).get(`/${classifierJobs[1].id}/best-detections/summary`).query(query)
Expand Down
Loading
Loading