-
Notifications
You must be signed in to change notification settings - Fork 0
/
export.mjs
74 lines (61 loc) · 1.71 KB
/
export.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import fs from "fs";
import mongodb from "mongodb";
(async () => {
const {MongoClient, ObjectID} = mongodb;
const client = await MongoClient.connect('mongodb://localhost:27017', {useNewUrlParser: true})
const db = client.db('tumn-cat');
const collection = db.collection('contents');
let sentences = await collection.find({
filter: {
$exists: true
}
}).toArray();
fs.writeFileSync('./result/mature.json', JSON.stringify(sentences.map(({content, filter}) => {
return {
content,
filter: filter.map(v => v % 2 >= 1)
};
})));
fs.writeFileSync('./result/swearword.json', JSON.stringify(sentences.map(({content, filter}) => {
return {
content,
filter: filter.map(v => v % 4 >= 2)
};
})));
fs.writeFileSync('./result/hatespeech.json', JSON.stringify(sentences.map(({content, filter}) => {
return {
content,
filter: filter.map(v => v >= 4)
};
})));
fs.writeFileSync('./result/multilabel.json', JSON.stringify(sentences.map(({content, filter}) => {
return {
content,
filter: filter.map(v => [v % 2 >= 1, v % 4 >= 2, v >= 4])
};
})));
const dataList = {};
['mature', 'swearwords', 'hatespeech'].forEach((v, i) => {
dataList[v] = {};
const mask = Math.pow(2, i);
sentences.forEach(sentence => {
sentence['content'].forEach((word, wi) => {
if(sentence['filter'][wi] & mask) {
dataList[v][word] = true;
}
});
});
});
fs.writeFileSync('./result/words.json', JSON.stringify(dataList, null, '\t'));
let sentencesEmbedding = await collection.find({
filter: {
$exists: true
}
}).toArray();
fs.writeFileSync('./result/embedding.json', JSON.stringify(sentencesEmbedding.map(({content}) => {
return {
content
};
})));
console.log("Finished!");
})();