test.py

# -*- coding: utf-8 -*-

import unittest
from slugify import slugify
from slugify import smart_truncate


class TestSlugification(unittest.TestCase):

    def test_extraneous_seperators(self):

        txt = "This is a test ---"
        r = slugify(txt)
        self.assertEqual(r, "this-is-a-test")

        txt = "___This is a test ---"
        r = slugify(txt)
        self.assertEqual(r, "this-is-a-test")

        txt = "___This is a test___"
        r = slugify(txt)
        self.assertEqual(r, "this-is-a-test")

    def test_non_word_characters(self):
        txt = "This -- is a ## test ---"
        r = slugify(txt)
        self.assertEqual(r, "this-is-a-test")

    def test_phonetic_conversion_of_eastern_scripts(self):
        txt = '影師嗎'
        r = slugify(txt)
        self.assertEqual(r, "ying-shi-ma")

    def test_accented_text(self):
        txt = 'C\'est déjà l\'été.'
        r = slugify(txt)
        self.assertEqual(r, "c-est-deja-l-ete")

        txt = 'Nín hǎo. Wǒ shì zhōng guó rén'
        r = slugify(txt)
        self.assertEqual(r, "nin-hao-wo-shi-zhong-guo-ren")

    def test_accented_text_with_non_word_characters(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt)
        self.assertEqual(r, "jaja-lol-mememeoo-a")

    def test_cyrillic_text(self):
        txt = 'Компьютер'
        r = slugify(txt)
        self.assertEqual(r, "kompiuter")

    def test_max_length(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=9)
        self.assertEqual(r, "jaja-lol")

        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=15)
        self.assertEqual(r, "jaja-lol-mememe")

    def test_max_length_cutoff_not_required(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=50)
        self.assertEqual(r, "jaja-lol-mememeoo-a")

    def test_word_boundary(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=15, word_boundary=True)
        self.assertEqual(r, "jaja-lol-a")

        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=17, word_boundary=True)
        self.assertEqual(r, "jaja-lol-mememeoo")

        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=18, word_boundary=True)
        self.assertEqual(r, "jaja-lol-mememeoo")

        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=19, word_boundary=True)
        self.assertEqual(r, "jaja-lol-mememeoo-a")

    def test_custom_separator(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=20, word_boundary=True, separator=".")
        self.assertEqual(r, "jaja.lol.mememeoo.a")

    def test_multi_character_separator(self):
        txt = 'jaja---lol-méméméoo--a'
        r = slugify(txt, max_length=20, word_boundary=True, separator="ZZZZZZ")
        self.assertEqual(r, "jajaZZZZZZlolZZZZZZmememeooZZZZZZa")

    def test_save_order(self):
        txt = 'one two three four five'
        r = slugify(txt, max_length=13, word_boundary=True, save_order=True)
        self.assertEqual(r, "one-two-three")

        txt = 'one two three four five'
        r = slugify(txt, max_length=13, word_boundary=True, save_order=False)
        self.assertEqual(r, "one-two-three")

        txt = 'one two three four five'
        r = slugify(txt, max_length=12, word_boundary=True, save_order=False)
        self.assertEqual(r, "one-two-four")

        txt = 'one two three four five'
        r = slugify(txt, max_length=12, word_boundary=True, save_order=True)
        self.assertEqual(r, "one-two")

    def test_stopword_removal(self):
        txt = 'this has a stopword'
        r = slugify(txt, stopwords=['stopword'])
        self.assertEqual(r, 'this-has-a')

    def test_stopword_removal_casesensitive(self):
        txt = 'thIs Has a stopword Stopword'
        r = slugify(txt, stopwords=['Stopword'], lowercase=False)
        self.assertEqual(r, 'thIs-Has-a-stopword')

    def test_multiple_stopword_occurances(self):
        txt = 'the quick brown fox jumps over the lazy dog'
        r = slugify(txt, stopwords=['the'])
        self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')

    def test_differently_cased_stopword_match(self):
        txt = 'Foo A FOO B foo C'
        r = slugify(txt, stopwords=['foo'])
        self.assertEqual(r, 'a-b-c')

        txt = 'Foo A FOO B foo C'
        r = slugify(txt, stopwords=['FOO'])
        self.assertEqual(r, 'a-b-c')

    def test_multiple_stopwords(self):
        txt = 'the quick brown fox jumps over the lazy dog in a hurry'
        r = slugify(txt, stopwords=['the', 'in', 'a', 'hurry'])
        self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')

    def test_stopwords_with_different_separator(self):
        txt = 'the quick brown fox jumps over the lazy dog'
        r = slugify(txt, stopwords=['the'], separator=' ')
        self.assertEqual(r, 'quick brown fox jumps over lazy dog')

    def test_html_entities(self):
        txt = 'foo &amp; bar'
        r = slugify(txt)
        self.assertEqual(r, 'foo-bar')

    def test_starts_with_number(self):
        txt = '10 amazing secrets'
        r = slugify(txt)
        self.assertEqual(r, '10-amazing-secrets')

    def test_contains_numbers(self):
        txt = 'buildings with 1000 windows'
        r = slugify(txt)
        self.assertEqual(r, 'buildings-with-1000-windows')

    def test_ends_with_number(self):
        txt = 'recipe number 3'
        r = slugify(txt)
        self.assertEqual(r, 'recipe-number-3')

    def test_numbers_only(self):
        txt = '404'
        r = slugify(txt)
        self.assertEqual(r, '404')

    def test_numbers_and_symbols(self):
        txt = '1,000 reasons you are #1'
        r = slugify(txt)
        self.assertEqual(r, '1000-reasons-you-are-1')

    def test_regex_pattern_keep_underscore(self):
        txt = "___This is a test___"
        regex_pattern = r'[^-a-z0-9_]+'
        r = slugify(txt, regex_pattern=regex_pattern)
        self.assertEqual(r, "___this-is-a-test___")

    def test_regex_pattern_keep_underscore_with_underscore_as_separator(self):
        """
        The regex_pattern turns the power to the caller.
        Hence the caller must ensure that a custom separator doesn't clash
        with the regex_pattern.
        """
        txt = "___This is a test___"
        regex_pattern = r'[^-a-z0-9_]+'
        r = slugify(txt, separator='_', regex_pattern=regex_pattern)
        self.assertNotEqual(r, "_this_is_a_test_")

    def test_replacements(self):
        txt = '10 | 20 %'
        r = slugify(txt, replacements=[['|', 'or'], ['%', 'percent']])
        self.assertEqual(r, "10-or-20-percent")

        txt = 'I ♥ 🦄'
        r = slugify(txt, replacements=[['♥', 'amour'], ['🦄', 'licorne']])
        self.assertEqual(r, "i-amour-licorne")


class TestUtils(unittest.TestCase):

    def test_smart_truncate_no_max_length(self):
        txt = '1,000 reasons you are #1'
        r = smart_truncate(txt)
        self.assertEqual(r, txt)

    def test_smart_truncate_no_seperator(self):
        txt = '1,000 reasons you are #1'
        r = smart_truncate(txt, max_length=100, separator='_')
        self.assertEqual(r, txt)


class TestUtils(unittest.TestCase):

    def test_smart_truncate_no_max_length(self):
        txt = '1,000 reasons you are #1'
        r = smart_truncate(txt)
        self.assertEqual(r, txt)

    def test_smart_truncate_no_seperator(self):
        txt = '1,000 reasons you are #1'
        r = smart_truncate(txt, max_length=100, separator='_')
        self.assertEqual(r, txt)


if __name__ == '__main__':
    unittest.main()