Skip to content

Commit

Permalink
#8. Add patterns replacement
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurkushman committed Jul 1, 2018
1 parent d9b23e2 commit c2fe70a
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 107 deletions.
23 changes: 19 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## Detox is a library to detect toxic comments of variable length with different patterns
## Detox is a library to detect toxic comments/posts of variable length with different patterns

[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/arthurkushman/detox/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/arthurkushman/detox/?branch=master)
[![Build Status](https://scrutinizer-ci.com/g/arthurkushman/detox/badges/build.png?b=master)](https://scrutinizer-ci.com/g/arthurkushman/detox/build-status/master)
Expand Down Expand Up @@ -46,19 +46,34 @@ There are no constraints to use all options at once, so u can do the following:
// Phrases object extends Words - just use all inherited methods
$detox = new Phrases(new EnglishSet(), $text);
$detox->processWords();
// change text in Text object
$text->setText('Another text');
// change string in Text object
$text->setString('Another text');
// inject Text object to Phrases
$detox->setText($text);
$detox->processPhrases();
$text->setText('Yet another text');
$text->setString('Yet another text');
$detox->setText($text);
$detox->processPatterns();
if ($detox->getScore() >= 0.5) {
echo 'Toxic text detected';
}
```

### Replace with custom templates and prefix/postfix pre-sets
An additional option that u may need in particular situations is to replace words/phrases with pre-set template:
```php
$this->text->setPrefix('[');
$this->text->setPostfix(']');
$this->text->setReplaceChars('____');
$this->text->setString('Just piss off dude');
$this->text->setReplaceable(true);
$this->phrases->setText($this->text);

$this->phrases->processPhrases();
echo $this->phrases->getText()->getString(); // output: Just [____] dude
```
By default pattern is 5 dashes, so u can call only `$this->text->setReplaceable(true);` before any processor to achieve replacement with default settings.

### Run tests
In root directory (in console) run the following:
```php
Expand Down
168 changes: 114 additions & 54 deletions clover.xml
Original file line number Diff line number Diff line change
@@ -1,77 +1,137 @@
<?xml version="1.0" encoding="UTF-8"?>
<coverage generated="1530365534">
<project timestamp="1530365534">
<coverage generated="1530446422">
<project timestamp="1530446422">
<package name="detox\core">
<file name="/srv/detox/src/core/Phrases.php">
<class name="detox\core\Phrases" namespace="detox\core">
<metrics complexity="5" methods="1" coveredmethods="1" conditionals="0" coveredconditionals="0" statements="9" coveredstatements="9" elements="10" coveredelements="10"/>
<metrics complexity="6" methods="1" coveredmethods="1" conditionals="0" coveredconditionals="0" statements="10" coveredstatements="10" elements="11" coveredelements="11"/>
</class>
<line num="10" type="method" name="processPhrases" visibility="public" complexity="5" crap="5" count="1"/>
<line num="13" type="stmt" count="1"/>
<line num="18" type="stmt" count="1"/>
<line num="19" type="stmt" count="1"/>
<line num="20" type="stmt" count="1"/>
<line num="21" type="stmt" count="1"/>
<line num="23" type="stmt" count="1"/>
<line num="24" type="stmt" count="1"/>
<line num="27" type="stmt" count="1"/>
<line num="31" type="stmt" count="1"/>
<metrics loc="31" ncloc="20" classes="1" methods="1" coveredmethods="1" conditionals="0" coveredconditionals="0" statements="9" coveredstatements="9" elements="10" coveredelements="10"/>
<line num="10" type="method" name="processPhrases" visibility="public" complexity="6" crap="6" count="2"/>
<line num="13" type="stmt" count="2"/>
<line num="18" type="stmt" count="2"/>
<line num="19" type="stmt" count="2"/>
<line num="20" type="stmt" count="2"/>
<line num="21" type="stmt" count="2"/>
<line num="22" type="stmt" count="2"/>
<line num="23" type="stmt" count="2"/>
<line num="28" type="stmt" count="2"/>
<line num="29" type="stmt" count="2"/>
<line num="31" type="stmt" count="2"/>
<metrics loc="31" ncloc="22" classes="1" methods="1" coveredmethods="1" conditionals="0" coveredconditionals="0" statements="10" coveredstatements="10" elements="11" coveredelements="11"/>
</file>
<file name="/srv/detox/src/core/Words.php">
<class name="detox\core\Words" namespace="detox\core">
<metrics complexity="14" methods="7" coveredmethods="7" conditionals="0" coveredconditionals="0" statements="25" coveredstatements="25" elements="32" coveredelements="32"/>
<class name="detox\core\Words" namespace="detox\core" fullPackage="detox" package="detox">
<metrics complexity="23" methods="11" coveredmethods="10" conditionals="0" coveredconditionals="0" statements="44" coveredstatements="42" elements="55" coveredelements="52"/>
</class>
<line num="24" type="method" name="__construct" visibility="public" complexity="1" crap="1" count="5"/>
<line num="26" type="stmt" count="5"/>
<line num="27" type="stmt" count="5"/>
<line num="32" type="method" name="processWords" visibility="public" complexity="5" crap="5" count="2"/>
<line num="35" type="stmt" count="2"/>
<line num="40" type="stmt" count="2"/>
<line num="41" type="stmt" count="2"/>
<line num="42" type="stmt" count="2"/>
<line num="43" type="stmt" count="2"/>
<line num="45" type="stmt" count="2"/>
<line num="46" type="stmt" count="1"/>
<line num="49" type="stmt" count="2"/>
<line num="53" type="stmt" count="2"/>
<line num="58" type="method" name="processPatterns" visibility="public" complexity="4" crap="4" count="2"/>
<line num="60" type="stmt" count="2"/>
<line num="33" type="method" name="__construct" visibility="public" complexity="1" crap="1" count="7"/>
<line num="35" type="stmt" count="7"/>
<line num="36" type="stmt" count="7"/>
<line num="37" type="stmt" count="7"/>
<line num="42" type="method" name="processWords" visibility="public" complexity="6" crap="6" count="3"/>
<line num="45" type="stmt" count="3"/>
<line num="50" type="stmt" count="3"/>
<line num="51" type="stmt" count="3"/>
<line num="52" type="stmt" count="3"/>
<line num="53" type="stmt" count="3"/>
<line num="54" type="stmt" count="3"/>
<line num="55" type="stmt" count="3"/>
<line num="60" type="stmt" count="3"/>
<line num="61" type="stmt" count="2"/>
<line num="62" type="stmt" count="2"/>
<line num="63" type="stmt" count="1"/>
<line num="64" type="stmt" count="1"/>
<line num="65" type="stmt" count="1"/>
<line num="66" type="stmt" count="1"/>
<line num="68" type="stmt" count="2"/>
<line num="73" type="method" name="setData" visibility="public" complexity="1" crap="1" count="1"/>
<line num="75" type="stmt" count="1"/>
<line num="76" type="stmt" count="1"/>
<line num="81" type="method" name="getScore" visibility="public" complexity="1" crap="1" count="5"/>
<line num="83" type="stmt" count="5"/>
<line num="89" type="method" name="setScore" visibility="public" complexity="1" crap="1" count="3"/>
<line num="91" type="stmt" count="3"/>
<line num="92" type="stmt" count="3"/>
<line num="98" type="method" name="addLowSpaces" visibility="protected" complexity="1" crap="1" count="5"/>
<line num="100" type="stmt" count="5"/>
<metrics loc="101" ncloc="69" classes="1" methods="7" coveredmethods="7" conditionals="0" coveredconditionals="0" statements="25" coveredstatements="25" elements="32" coveredelements="32"/>
<line num="63" type="stmt" count="3"/>
<line num="68" type="method" name="processPatterns" visibility="public" complexity="7" crap="7.10" count="2"/>
<line num="70" type="stmt" count="2"/>
<line num="71" type="stmt" count="2"/>
<line num="72" type="stmt" count="2"/>
<line num="73" type="stmt" count="2"/>
<line num="74" type="stmt" count="1"/>
<line num="77" type="stmt" count="2"/>
<line num="78" type="stmt" count="2"/>
<line num="79" type="stmt" count="1"/>
<line num="80" type="stmt" count="1"/>
<line num="81" type="stmt" count="0"/>
<line num="84" type="stmt" count="2"/>
<line num="85" type="stmt" count="2"/>
<line num="86" type="stmt" count="1"/>
<line num="87" type="stmt" count="1"/>
<line num="88" type="stmt" count="0"/>
<line num="91" type="stmt" count="2"/>
<line num="96" type="method" name="setData" visibility="public" complexity="1" crap="1" count="1"/>
<line num="98" type="stmt" count="1"/>
<line num="99" type="stmt" count="1"/>
<line num="104" type="method" name="getScore" visibility="public" complexity="1" crap="1" count="5"/>
<line num="106" type="stmt" count="5"/>
<line num="112" type="method" name="setScore" visibility="public" complexity="1" crap="1" count="3"/>
<line num="114" type="stmt" count="3"/>
<line num="115" type="stmt" count="3"/>
<line num="121" type="method" name="addLowSpaces" visibility="protected" complexity="1" crap="1" count="7"/>
<line num="123" type="stmt" count="7"/>
<line num="129" type="method" name="replace" visibility="protected" complexity="1" crap="1" count="3"/>
<line num="132" type="stmt" count="3"/>
<line num="133" type="stmt" count="3"/>
<line num="134" type="stmt" count="3"/>
<line num="136" type="method" name="replaceMatches" visibility="private" complexity="2" crap="2" count="1"/>
<line num="139" type="stmt" count="1"/>
<line num="140" type="stmt" count="1"/>
<line num="142" type="stmt" count="1"/>
<line num="148" type="method" name="setText" visibility="public" complexity="1" crap="1" count="7"/>
<line num="150" type="stmt" count="7"/>
<line num="151" type="stmt" count="7"/>
<line num="156" type="method" name="getText" visibility="public" complexity="1" crap="1" count="3"/>
<line num="158" type="stmt" count="3"/>
<metrics loc="159" ncloc="109" classes="1" methods="11" coveredmethods="10" conditionals="0" coveredconditionals="0" statements="44" coveredstatements="42" elements="55" coveredelements="52"/>
</file>
</package>
<package name="detox\dataset">
<file name="/srv/detox/src/dataset/EnglishSet.php">
<class name="detox\dataset\EnglishSet" namespace="detox\dataset">
<metrics complexity="2" methods="2" coveredmethods="2" conditionals="0" coveredconditionals="0" statements="2" coveredstatements="2" elements="4" coveredelements="4"/>
</class>
<line num="56" type="method" name="getWords" visibility="public" complexity="1" crap="1" count="2"/>
<line num="58" type="stmt" count="2"/>
<line num="61" type="method" name="getPhrases" visibility="public" complexity="1" crap="1" count="1"/>
<line num="63" type="stmt" count="1"/>
<metrics loc="64" ncloc="57" classes="1" methods="2" coveredmethods="2" conditionals="0" coveredconditionals="0" statements="2" coveredstatements="2" elements="4" coveredelements="4"/>
<line num="61" type="method" name="getWords" visibility="public" complexity="1" crap="1" count="3"/>
<line num="63" type="stmt" count="3"/>
<line num="66" type="method" name="getPhrases" visibility="public" complexity="1" crap="1" count="2"/>
<line num="68" type="stmt" count="2"/>
<metrics loc="69" ncloc="62" classes="1" methods="2" coveredmethods="2" conditionals="0" coveredconditionals="0" statements="2" coveredstatements="2" elements="4" coveredelements="4"/>
</file>
</package>
<file name="/srv/detox/src/dataset/SetContract.php">
<metrics loc="11" ncloc="11" classes="0" methods="0" coveredmethods="0" conditionals="0" coveredconditionals="0" statements="0" coveredstatements="0" elements="0" coveredelements="0"/>
</file>
<metrics files="4" loc="207" ncloc="157" classes="3" methods="10" coveredmethods="10" conditionals="0" coveredconditionals="0" statements="36" coveredstatements="36" elements="46" coveredelements="46"/>
<package name="detox\source">
<file name="/srv/detox/src/source/Text.php">
<class name="detox\source\Text" namespace="detox\source">
<metrics complexity="11" methods="11" coveredmethods="11" conditionals="0" coveredconditionals="0" statements="17" coveredstatements="17" elements="28" coveredelements="28"/>
</class>
<line num="13" type="method" name="__construct" visibility="public" complexity="1" crap="1" count="9"/>
<line num="15" type="stmt" count="9"/>
<line num="16" type="stmt" count="9"/>
<line num="18" type="method" name="setString" visibility="public" complexity="1" crap="1" count="8"/>
<line num="20" type="stmt" count="8"/>
<line num="21" type="stmt" count="8"/>
<line num="23" type="method" name="getString" visibility="public" complexity="1" crap="1" count="8"/>
<line num="25" type="stmt" count="8"/>
<line num="31" type="method" name="setReplaceChars" visibility="public" complexity="1" crap="1" count="3"/>
<line num="33" type="stmt" count="3"/>
<line num="34" type="stmt" count="3"/>
<line num="39" type="method" name="getReplaceChars" visibility="public" complexity="1" crap="1" count="4"/>
<line num="41" type="stmt" count="4"/>
<line num="47" type="method" name="setPrefix" visibility="public" complexity="1" crap="1" count="3"/>
<line num="49" type="stmt" count="3"/>
<line num="50" type="stmt" count="3"/>
<line num="55" type="method" name="getPostfix" visibility="public" complexity="1" crap="1" count="4"/>
<line num="57" type="stmt" count="4"/>
<line num="63" type="method" name="setPostfix" visibility="public" complexity="1" crap="1" count="3"/>
<line num="65" type="stmt" count="3"/>
<line num="66" type="stmt" count="3"/>
<line num="71" type="method" name="getPrefix" visibility="public" complexity="1" crap="1" count="4"/>
<line num="73" type="stmt" count="4"/>
<line num="79" type="method" name="isReplaceable" visibility="public" complexity="1" crap="1" count="7"/>
<line num="81" type="stmt" count="7"/>
<line num="87" type="method" name="setReplaceable" visibility="public" complexity="1" crap="1" count="3"/>
<line num="89" type="stmt" count="3"/>
<line num="90" type="stmt" count="3"/>
<metrics loc="90" ncloc="66" classes="1" methods="11" coveredmethods="11" conditionals="0" coveredconditionals="0" statements="17" coveredstatements="17" elements="28" coveredelements="28"/>
</file>
</package>
<metrics files="5" loc="360" ncloc="270" classes="4" methods="25" coveredmethods="24" conditionals="0" coveredconditionals="0" statements="73" coveredstatements="71" elements="98" coveredelements="95"/>
</project>
</coverage>
14 changes: 7 additions & 7 deletions src/core/Phrases.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Phrases extends Words
public function processPhrases()
{
// to match lower case letters in words set array
$lowerSource = $this->addLowSpaces($this->text->getText());
$lowerSource = $this->addLowSpaces($this->text->getString());
/**
* @var string $points
* @var array $phrases
Expand All @@ -19,14 +19,14 @@ public function processPhrases()
foreach ($phrases as $phrase) {
if (mb_strpos($lowerSource, ' ' . $phrase . ' ') !== false) {
$this->score += (float)$points;
}
if ($this->score >= self::MAX_SCORE) {
$this->score = self::MAX_SCORE;

// we don't need to iterate more with max score
return;
if ($this->text->isReplaceable()) {
$this->replace($phrase);
}
}
}
}
if ($this->score >= self::MAX_SCORE) {
$this->score = self::MAX_SCORE;
}
}
}
59 changes: 45 additions & 14 deletions src/core/Words.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public function __construct(SetContract $set, Text $text)
public function processWords() : void
{
// to match lower case letters in words set array
$lowerSource = $this->addLowSpaces($this->text->getText());
$lowerSource = $this->addLowSpaces($this->text->getString());
/**
* @var string $points
* @var array $words
Expand All @@ -51,29 +51,42 @@ public function processWords() : void
foreach ($words as $word) {
if (mb_strpos($lowerSource, ' ' . $word . ' ') !== false) {
$this->score += (float)$points;
}
if ($this->score >= self::MAX_SCORE) {
$this->score = self::MAX_SCORE;

// we don't need to iterate more with max score
return;
if ($this->text->isReplaceable()) {
$this->replace($word);
}
}
}
}
if ($this->score >= self::MAX_SCORE) {
$this->score = self::MAX_SCORE;
}
}

/**
* Finds bad words with asterisks and setting score/replace
*/
public function processPatterns() : void
{
$lowerSource = $this->addLowSpaces($this->text->getText());
if (preg_match('/\s(([\w]+)[\*]+([\w]+))\s/', $lowerSource) === 1) {
$this->score += self::ASTERISKS_MIDDLE;
} else if (preg_match('/\s(([\w]+)[\*]+)/', $lowerSource) === 1) {
$this->score += self::ASTERISKS_RIGHT;
} else if (preg_match('/([\*]+([\w]+))\s/', $lowerSource) === 1) {
$this->score += self::ASTERISKS_LEFT;
$lowerSource = $this->addLowSpaces($this->text->getString());
if (preg_match_all('/\s(([\w]+)[\*]+([\w]+))\s/', $lowerSource, $matches) > 0) {
$this->score += (self::ASTERISKS_MIDDLE * count($matches[0]));
if ($this->text->isReplaceable()) {
$this->replaceMatches($matches);
}
}
$lowerSource = $this->addLowSpaces($this->text->getString());
if (preg_match_all('/\s(([\w]+)[\*]+)\s/', $lowerSource, $matches) > 0) {
$this->score += (self::ASTERISKS_RIGHT * count($matches[0]));
if ($this->text->isReplaceable()) {
$this->replaceMatches($matches);
}
}
$lowerSource = $this->addLowSpaces($this->text->getString());
if (preg_match_all('/\s([\*]+([\w]+))\s/', $lowerSource, $matches) > 0) {
$this->score += (self::ASTERISKS_LEFT * count($matches[0]));
if ($this->text->isReplaceable()) {
$this->replaceMatches($matches);
}
}
}

Expand Down Expand Up @@ -110,6 +123,24 @@ protected function addLowSpaces(string $str) : string
return ' ' . mb_strtolower($str) . ' ';
}

/**
* @param string $phrase word or phrase to replace
*/
protected function replace(string $phrase) : void
{
// todo: slice the word with replacement to prevent WoRd -> word transformations
$this->text->setString(str_replace($phrase, $this->text->getPrefix() . $this->text->getReplaceChars() . $this->text->getPostfix(),
mb_strtolower($this->text->getString())));
}

private function replaceMatches(array $matches) : void
{
/** @var array $matches */
foreach ($matches[0] as $word) {
$this->replace($word);
}
}

/**
* Setter for convenient DI with Text object and it's properties
* @param Text $text
Expand Down
Loading

0 comments on commit c2fe70a

Please sign in to comment.