crisbal · crisbal · Jun 24, 2017 · Jun 22, 2017 · Jun 23, 2017 · Jun 23, 2017
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@ venv/
 *.wav
 splits/
 __pycache__/
+.idea
+tracks.txt
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Album-Splitter
 
-Do you have a music album as a single file (locally or on YouTube), with all its tracks joined together? Do you want to split that album in its single tracks? Do you want to tag these tracks so your music player can get all the required infos from them?
+Do you have a music album as a single file (locally or on YouTube), with all its tracks joined together? Do you want to split that album in its single tracks? Do you want to tag these tracks so your music player can get all the required info from them?
 
 This script is for you!
 
@@ -19,33 +19,35 @@ This script is for you!
 
 ## Quick guide (from a local album)
 
-1. Open tracks.txt
-2. Add tracks info in this format:
+1. Create a copy of the tracks.txt.example, rename it as tracks.txt and
+   delete the lines starting with #.
+2. Open tracks.txt
+3. Add tracks info in this format:
     * ```<start-time> - <title>```
     * A track on each line
     * See *Examples* section
-3. Run the script
+4. Run the script
     * Basic usage: ```python split.py -mp3 <path/to/your/album.mp3>```
     * More in the *Examples* section
-4. Wait for the splitting process to complete
-5. ????
-6. You will find yout tracks in the `splits` folder
+5. Wait for the splitting process to complete
+6. You will find your tracks in the `splits` folder
 
 ## Quick guide (from a YouTube video)
 
 1. Copy the YouTube URL of the album you want to download and split
 2. Find in the YouTube comments the tracklist with start-time and title
-3. Open tracks.txt
-4. Copy the tracklist in the file, adjusting for the supported format
+3. Create a copy of the tracks.txt.example, rename it as tracks.txt and
+   delete the lines starting with #.
+4. Open tracks.txt
+5. Copy the tracklist in the file, adjusting for the supported format
     * ```<start-time> - <title>```
     * A track on each line
-5. Run the script
+6. Run the script
     * Basic usage: ```python split.py -yt <youtube_url>```
     * More in the *Examples* section
-4. Wait for the Download and for the conversion
-5. Wait for the splitting process to complete
-5. ????
-6. You will find yout tracks in the `splits` folder
+7. Wait for the Download and for the conversion
+8. Wait for the splitting process to complete
+9. You will find your tracks in the `splits` folder
 
 ## Examples
 
@@ -112,7 +114,7 @@ If you need any help just create an Issue or send me an email at the address you
 If you want to improve the code and submit a pull request feel free to do so.
 
 
-## Licensce
+## Licence
 
 GPL v3
 

diff --git a/requirements.txt b/requirements.txt
@@ -2,4 +2,4 @@ beautifulsoup4==4.4.0
 mutagen==1.31
 pydub==0.14.2
 urlparse3==1.0.3
-youtube-dl==2017.2.17
+youtube-dl==2017.6.18
diff --git a/split.py b/split.py
@@ -15,6 +15,8 @@
 
 import splitutil
 
+from utilities.track_parser import track_parser
+
 
 mdProviders = []
 for module in os.listdir("MetaDataProviders"):
@@ -147,32 +149,22 @@ def my_hook(d):
     tracksStarts = []
     tracksTitles = []
 
-    regex = re.compile("(?P<start>.+)\s*\-\s*(?P<title>.+)")
-
     print("Parsing " + TRACKS_FILE)
     with open(TRACKS_FILE) as tracksF:
-        if DURATION:
-            time_elapsed = '0:00:00'
-            for i, line in enumerate(tracksF):
-                m = regex.match(line)
+        time_elapsed = '0:00:00'
+        for i, line in enumerate(tracksF):
+            curr_start, curr_title = track_parser(line)
+            tTitle = curr_title
 
+            if DURATION:
                 tStart = splitutil.timeToSeconds(time_elapsed)
-                tTitle = m.group('title').strip()
-
-                tracksStarts.append(tStart*1000)
-                tracksTitles.append(tTitle)
-
-                curr_track_time = m.group('start').strip()
-                time_elapsed = splitutil.updateTimeChange(time_elapsed, curr_track_time)
-        else:
-            for i, line in enumerate(tracksF):
-                m = regex.match(line)
+                time_elapsed = splitutil.updateTimeChange(time_elapsed, curr_start)
+            else:
+                tStart = splitutil.timeToSeconds(curr_start)
 
-                tStart = splitutil.timeToSeconds(m.group('start').strip())
-                tTitle = m.group('title').strip()
+            tracksStarts.append(tStart*1000)
+            tracksTitles.append(curr_title)
 
-                tracksStarts.append(tStart*1000)
-                tracksTitles.append(tTitle)
     print("Tracks file parsed")
 
     album = None
@@ -203,7 +195,7 @@ def my_hook(d):
         queue = Queue()
         for index, track in enumerate(tracksTitles):
             queue.put((index, track))
-        # initailize/start threads
+        # initialize/start threads
         threads = []
         for i in range(NUM_THREADS):
             new_thread = Thread(target=thread_func, args=(album, tracksStarts, queue, FOLDER))

diff --git a/tracks.txt b/tracks.txt
diff --git a/tracks.txt.example b/tracks.txt.example
@@ -0,0 +1,22 @@
+# ATTENTION: TO USE, ERASE THE LINES STARTING WITH #
+# This is an example file of how to write the tracks.txt file on your installation:
+#
+# 1) Copy this file and name the copy: tracks.txt
+# 2) Erase any line starting with # from the tracks.txt (not from tracks.txt.example)
+# 3) Write or copy the track list in any of the following formats:
+#    - Dashes (-) can be omitted
+#    - Time can be writen in HH:MM:SS or MM:SS format.
+#    - If the list is numbered, tracks can be from 1 to 99 (at the moment)
+#    - If time is over the logical constrains (00 <= HH <= 23, 00 <= MM, SS <= 59) then the out of
+#      bounds digit(s) will be omitted:
+#      ex1: MM:SS=67:61 will result in 7:6
+#      ex2: HH:MM:SS=24:05:01 will result in 24:05
+00:00:00 - When I Was Young
+03:28 - Dogs Eating Dogs
+06:58 Disaster
+Boxing Day - 10:40
+Pretty Little Girl 14:39
+1. Proton Lander - 00:16:45
+1. 19:20 Parabola
+1 Crooked Teeth - 23:45
+1 00:26:15 Dozer
diff --git a/utilities/__init__.py b/utilities/__init__.py
diff --git a/utilities/track_parser.py b/utilities/track_parser.py
@@ -0,0 +1,39 @@
+import re
+
+
+NOISE = [
+    ' - ',
+    '^-',
+    '(?:-)?[0-9]{1,2} - ',
+    '(?:-)?[0-9]{1,2}\.',
+    '(?:-)?[0-9]{1,2} ',
+]
+
+
+def track_parser(s):
+    """
+    Matches any combination of the following:
+    Beginning of a line:
+        - 1. to 99.
+        - 1 to 99
+        - title
+        - time in HH(optional):MM(required):SS(required) format
+    Middle:
+        - dash between spaces (' - ') separating the title and the time
+    End of the line:
+        - title
+        - time in HH(optional):MM(required):SS(required) format
+
+    Achieves the split by assuming as noise, every regex in the NOISE list.
+    :param s: Track string to split
+    :return: (time, title) tuple
+    """
+    try:
+        # Explanation:                     HH optional          MM   and   SS required
+        regex = re.compile('(?P<start>(?:([01]?\d|2[0-3]):)?([0-5]?\d):([0-5]?\d))')
+        start_time = regex.search(s).group('start')
+        title = re.sub('|'.join(NOISE), '', regex.sub('', s, count=1)).strip()
+        return start_time, title
+    except AttributeError:
+        print('Error occurred when parsing the string: {}'.format(s))
+        return '', ''