data_handling.py

"""This module provide the class and functions needed to handle the data used to generate a dataset for activity
recognition.
"""
import os
import csv
import platform

from datetime import datetime
from datetime import timedelta


class TimecodePresentError(Exception):
    def __init__(self, message):
        self.message = message

    def __str__(self):
        return repr(self.message)


class TooFewColumnInFile(Exception):
    def __init__(self, message):
        self.message = message

    def __str__(self):
        return repr(self.message)


def import_unified_file(path_to_file: str) -> dict:
    """Import the parameters from a unified CSV file.

    It will read the file, create an array of each column_value (ordered) and store the array in a dictionary with the
    key set to the column_name (upper_case).
    At least one column_name different of 'TIMECODE' and 'TAG' must be present in the file.

    The file have to be correctly formatted :
        1st line : column_name1, column_name2, columen_name3, ...
        following lines : value1, value2, value3, ...

    Parameters
    ----------
    path_to_file : str
        The absolute path to the CSV file to import.

    Returns
    -------
        parameter_dict : dict
            The imported parameters as a dictionary build as follow:
                parameter_dict['column_name'] = [value_row1, value_row2, value_row3, ...]
            The keys 'TIMECODE' and 'TAG' are added if not present in the CSV file.

    Raises
    ------
    TypeError
        You have provided a wrong parameter.
    FileNotFoundError
        The given file does not exist or the path is mis-formatted.
    IOError
        The file can't be open because of an OS restriction or error.
    csv.Error
        If the csv library builtin functions fail.
    TooFewColumnInFile
        When the CSV file contains to few column (only one or only both :"TIMECODE", "TAG").

    """
    # Test if the given parameters have the right type
    if type(path_to_file) is not str:
        raise TypeError('The "path_to_file" must be a string', type(path_to_file))

    # Define the function variable
    parameter_dict = {}

    # Normalize path for Windows
    if platform.system() == 'Windows' and path_to_file[0] == '/':
        path_to_file = path_to_file[1:]

    # Read the input file
    with open(path_to_file, newline='') as unified_file:
        # Create the CSV reader
        reader = csv.DictReader(unified_file, restkey='UnknownName', restval='N/A')

        # Set the dictionaries
        for fieldname in reader.fieldnames:
            parameter_dict[str(fieldname).upper()] = []

        # Test if the CSV file contain enough data
        if len(parameter_dict.keys() - {'TIMECODE', 'TAG'}) == 0:
            raise TooFewColumnInFile('Their is too few parameter to import in the given file.')
        # Add th values to the dictionaries
        for row in reader:
            for key, value in row.items():
                key = str(key).upper()
                if key == 'TIMECODE' and value == '':
                    pass
                else:
                    parameter_dict[key].append(value)

        # Add the default dictionaries if not present in the imported CSV file
        if 'TIMECODE' not in parameter_dict.keys():
            parameter_dict['TIMECODE'] = []
        elif len(parameter_dict['TIMECODE']) != 0:
            if parameter_dict['TIMECODE'][0].isnumeric():
                for value in parameter_dict['TIMECODE']:
                    value = datetime.utcfromtimestamp(float(value))
            else:
                for value in parameter_dict['TIMECODE']:
                    temp_value = value.split(':')
                    value = datetime(1970, 1, 1, int(temp_value[0]), int(temp_value[1]), int(temp_value[2]),
                                     int(temp_value[3])*1000)

        if 'TAG' not in parameter_dict.keys():
            parameter_dict['TAG'] = [''] * len(parameter_dict[list(parameter_dict.keys() - {'TIMECODE', 'TAG'})[0]])

    return parameter_dict


def generate_timecodes(parameter_dict: dict, sampling_rate: int, force=False) -> None:
    """Generate the data timecodes with a specific sampling rate

    It will generate the timecodes for the `parameter_dict` passed as parameter using a given `sampling_rate`.
    Existing timecodes can be overwritten by changing the `force` parameter.

    Parameters
    ----------
    parameter_dict : dict
        A parameter dictionary generated by one of the import function.
    sampling_rate : int
        The sampling rate of the given data (record per second).
    force : bool
        Set to True to overwrite the existing timecode.

    Raises
    ------
    TypeError
        You have provided a wrong parameter.
    TimecodePresentError
        The given `parameter_dict` already have timecodes set.

    Notes
    -----
    The time delta (μs) between each timecode will be calculated with the formula :
        timedelta = (1 / sampling_rate) * 1000000)
    """

    # Test if the given parameters have the right type
    if type(parameter_dict) is not dict or 'TIMECODE' not in parameter_dict.keys():
        raise TypeError('The "parameter_dict" must be a dictionary generated by one of the import function.')
    elif type(sampling_rate) is not int:
        raise TypeError('The "sampling_rate" must be an int.')
    elif type(force) is not bool:
        raise TypeError('The "force" must be a boolean.')

    # Test if there if values in parameter_dict['TIMECODE']
    if len(parameter_dict['TIMECODE']) != 0 and not force:
        raise TimecodePresentError('The "TIMECODE" parameter already contains entries.')
    else:
        # Get the number of timecode to generate
        amount = len(parameter_dict[list(parameter_dict.keys() - {'TIMECODE', 'TAG'})[0]])

        # Generate timecodes
        timecode = datetime(1970, 1, 1, 0, 0, 0, 0)
        delta = timedelta(microseconds=(1 / sampling_rate) * 1000000)
        for i in range(amount):
            parameter_dict['TIMECODE'].append(timecode)
            timecode = timecode + delta


def export_dataset(parameter_dict: dict, selected_parameter: list, output_dir: str) -> None:
    """Export the given parameter to a dataset

    It will generate a dataset using the `parameters_dict` passed as a parameter in the selected directory.

    Parameters
    ----------
    parameter_dict : dict
         A parameter dictionary generated by one of the import function.
    selected_parameter : list
        List of parameter selected for exportation.
    output_dir : str
        Path to the output directory for the generated dataset.
    """
    # Test if the given parameters have the right type
    if type(parameter_dict) is not dict:
        raise TypeError('The "parameter_dict" must be a dictionary', type(parameter_dict))
    if type(selected_parameter) is not list:
        raise TypeError('The "selected_parameter" must be a list', type(selected_parameter))
    if type(output_dir) is not str:
        raise TypeError('The "output_dir" must be a string', type(output_dir))

    # Generate the dataset file
    parameter_list = ['TIMECODE']
    parameter_list.extend(selected_parameter)
    parameter_list.append('TAG')

    # Normalize the path for windows
    if platform.system() == 'Windows' and output_dir[0] == '/':
        output_dir = output_dir[1:]

    # Write the output file
    with open(output_dir + os.path.sep + 'dataset.csv', 'w', newline='') as output_file:
        writer = csv.DictWriter(output_file, fieldnames=parameter_list, dialect='excel')

        writer.writeheader()
        for i in range(len(parameter_dict['TIMECODE'])):
            temp_dict = {}
            for parameter in parameter_list:
                value = parameter_dict.get(parameter)[i]
                if type(value) is datetime:
                    value = value.strftime('%H:%M:%S:') + str(int(value.microsecond / 1000))
                temp_dict[parameter] = value
            writer.writerow(temp_dict)


def export_dataset_separated_files(parameter_dict: dict, selected_parameter: list, output_dir: str) -> None:
    """Export the given parameter to a dataset as separated files

    It will generate a dataset using the `parameters_dict` passed as a parameter in the selected directory.

    Parameters
    ----------
    parameter_dict : dict
         A parameter dictionary generated by one of the import function.
    selected_parameter : list
        List of parameter selected for exportation.
    output_dir : str
        Path to the output directory for the generated dataset.
    """
    # Test if the given parameters have the right type
    if type(parameter_dict) is not dict:
        raise TypeError('The "parameter_dict" must be a dictionary', type(parameter_dict))
    if type(selected_parameter) is not list:
        raise TypeError('The "selected_parameter" must be a list', type(selected_parameter))
    if type(output_dir) is not str:
        raise TypeError('The "output_dir" must be a string', type(output_dir))
    # Normalize the output files
    if platform.system() == 'Windows' and output_dir[0] == '/':
        output_dir = output_dir[1:]

    # Write th files
    for parameter in selected_parameter:
        with open(output_dir + os.path.sep + parameter + '.csv', 'w', newline='') as output_file:
            local_list = ['TIMECODE', parameter, 'TAG']
            writer = csv.DictWriter(output_file, fieldnames=local_list, dialect='excel')

            writer.writeheader()
            for i in range(len(parameter_dict['TIMECODE'])):
                temp_dict = {}
                for value in local_list:
                    value = parameter_dict.get(parameter)[i]
                    if type(value) is datetime:
                        value = value.strftime('%H:%M:%S:') + str(int(value.microsecond / 1000))
                    temp_dict[parameter] = value
                writer.writerow(temp_dict)