workbench

#!/usr/bin/env python3

# Usage: ./workbench --config config.yml --check
# Usage: ./workbench --config config.yml

import os
import sys
import copy
import json
import csv
import logging
import datetime
import argparse
import collections
import subprocess
import requests_cache
from progress_bar import InitBar
from workbench_utils import *
import workbench_fields
from WorkbenchConfig import WorkbenchConfig


def create():
    """Create new nodes via POST, and add media if there are any.
    """
    message = '"Create" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    path_to_rollback_csv_file = get_rollback_csv_filepath(config)
    prep_rollback_csv(config, path_to_rollback_csv_file)
    logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)

    if config['csv_headers'] == 'labels':
        fieldname_map_cache_path = os.path.join(config['temp_dir'], f"node-{config['content_type']}-labels.fieldname_map")
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    if config['log_term_creation'] is False:
        logging.info("'log_term_creation' configuration setting is False. Creation of new taxonomy terms will not be logged.")

    if config['secondary_tasks'] is not None:
        if os.path.abspath(args.config) not in json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]):
            prep_parent_node_ids_map(config)

    csv_path = os.path.join(config['input_dir'], config['input_csv'])
    node_ids = dict()
    field_definitions = get_field_definitions(config, 'node')
    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames

    node_endpoint = config['host'] + '/node?_format=json'

    if config['nodes_only'] is True:
        message = '"nodes_only" option in effect. No media will be created.'
        print(message)
        logging.info(message)

    row_count = 0
    for row in csv_data:
        # Create a copy of the current item's row to pass to create_media().
        row_for_media = copy.deepcopy(row)
        if config['paged_content_from_directories'] is True:
            # Create a copy of the current item's row to pass to the
            # create_children_from_directory function.
            row_as_parent = copy.deepcopy(row)

        id_field = row[config['id_field']]

        # Add required fields. 'status' ("published") can be overridden in CSV, below.
        node = {
            'type': [
                {'target_id': config['content_type'],
                 'target_type': 'node_type'}
            ],
            'title': [
                {'value': row['title']}
            ],
            'status': [
                {'value': config['published']}
            ]
        }

        # Some optional base fields.
        if 'uid' in csv_column_headers:
            if len(row['uid']) > 0:
                node['uid'] = [{'target_id': row['uid']}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row['uid'] = ''

        if 'created' in csv_column_headers:
            if len(row['created']) > 0:
                node['created'] = [{'value': row['created']}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row['created'] = ''

        if 'langcode' in csv_column_headers:
            if len(row['langcode']) > 0:
                node['langcode'] = [{'value': row['langcode']}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row['langcode'] = ''

        if 'published' in csv_column_headers:
            if len(row['published']) > 0:
                node['status'] = [{'value': row['published']}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row['published'] = ''

        # If a node with an ID that matches the current item's 'parent_id'
        # value has just been created, make the item a child of the node.
        if 'parent_id' in row.keys() and row['parent_id'] in node_ids:
            row['field_member_of'] = node_ids[row['parent_id']]

        # For children whose parent node was created in the primary task. The ISLANDORA_WORKBENCH_SECONDARY_TASKS
        # environment variable (set by the primary task) contains the names of the config files registered in the
        # primary task's 'secondary_tasks' config option. If the name of the currently running task is in that list
        # (i.e., it's a secondary task), populate its CSV 'field_member_of' with node IDs from the primary task using
        # parent IDs from its 'parent_id' field as the key.
        secondary_task_data = read_parent_node_ids_map(config)
        if os.environ.get('ISLANDORA_WORKBENCH_SECONDARY_TASKS') is not None:
            if os.path.abspath(args.config) in json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]):
                if len(secondary_task_data) > 0:
                    if 'field_member_of' in row and 'parent_id' in row and row['parent_id'] in secondary_task_data.keys():
                        row['field_member_of'] = secondary_task_data[row['parent_id']]
                    else:
                        # If there is no parent ID/nid pair in the secondary_task_data mapping, either because it wasn't
                        # in the primary CSV or it failed to be created in the primary task, skip creating the current
                        # secondary node and move on.
                        logging.warning('Node for row with ID %s in secondary task CSV not created because its parent was not present or not created in the primary task.', id_field)
                        continue

        # Add custom (non-required) CSV fields.
        entity_fields = get_entity_fields(config, 'node', config['content_type'])
        # Only add config['id_field'] to required_fields if it is not a node field.
        required_fields = ['file', 'title']
        if config['id_field'] not in entity_fields:
            required_fields.append(config['id_field'])
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        additional_files_entries = get_additional_files_config(config)
        for custom_field in custom_fields:
            # Skip processing field if empty.
            if len(row[custom_field].strip()) == 0:
                continue

            if len(additional_files_entries) > 0:
                if custom_field in additional_files_entries.keys():
                    continue

            # This field can exist in the CSV to create parent/child
            # relationships and is not a Drupal field.
            if custom_field == 'parent_id':
                continue

            # 'langcode' is a core Drupal field, but is not considered a "base field".
            if custom_field == 'langcode':
                continue

            # 'image_alt_text' is a reserved CSV field.
            if custom_field == 'image_alt_text':
                continue

            # 'url_alias' is a reserved CSV field.
            if custom_field == 'url_alias':
                continue

            # 'media_use_tid' is a reserved CSV field.
            if custom_field == 'media_use_tid':
                continue

            # 'checksum' is a reserved CSV field.
            if custom_field == 'checksum':
                continue

            # We skip CSV columns whose headers use the 'media:video:field_foo' media track convention.
            if custom_field.startswith('media:'):
                continue

            # Execute field preprocessor scripts, if any are configured. Note that these scripts
            # are applied to the entire value from the CSV field and not split field values,
            # e.g., if a field is multivalued, the preprocesor must split it and then reassemble
            # it back into a string before returning it. Note that preprocessor scripts work only
            # on string data and not on binary data like images, etc. and only on custom fields
            # (so not title).
            if 'preprocessors' in config and len(config['preprocessors']) > 0:
                for field, command in config['preprocessors'].items():
                    if field in csv_column_headers:
                        output, return_code = preprocess_field_data(config['subdelimiter'], row[field], command)
                        if return_code == 0:
                            preprocessor_input = copy.deepcopy(row[field])
                            row[field] = output.decode().strip()
                            logging.info(
                                'Preprocess command %s executed, taking "%s" as input and returning "%s".',
                                command,
                                preprocessor_input,
                                output.decode().strip())
                        else:
                            message = 'Preprocess command ' + command + ' failed with return code ' + str(return_code)
                            logging.error(message)
                            sys.exit(message)

            # Assemble Drupal field structures for entity reference fields from CSV data.
            # Entity reference fields (taxonomy_term and node).
            if field_definitions[custom_field]['field_type'] == 'entity_reference':
                entity_reference_field = workbench_fields.EntityReferenceField()
                node = entity_reference_field.create(config, field_definitions, node, row, custom_field)

            # Typed relation fields.
            elif field_definitions[custom_field]['field_type'] == 'typed_relation':
                typed_relation_field = workbench_fields.TypedRelationField()
                node = typed_relation_field.create(config, field_definitions, node, row, custom_field)

            # Geolocation fields.
            elif field_definitions[custom_field]['field_type'] == 'geolocation':
                geolocation_field = workbench_fields.GeolocationField()
                node = geolocation_field.create(config, field_definitions, node, row, custom_field)

            # Link fields.
            elif field_definitions[custom_field]['field_type'] == 'link':
                link_field = workbench_fields.LinkField()
                node = link_field.create(config, field_definitions, node, row, custom_field)

            # Authority Link fields.
            elif field_definitions[custom_field]['field_type'] == 'authority_link':
                link_field = workbench_fields.AuthorityLinkField()
                node = link_field.create(config, field_definitions, node, row, custom_field)

            # For non-entity reference and non-typed relation fields (text, integer, boolean etc.).
            else:
                simple_field = workbench_fields.SimpleField()
                node = simple_field.create(config, field_definitions, node, row, custom_field)

        node_headers = {'Content-Type': 'application/json'}
        node_endpoint = '/node?_format=json'
        node_response = issue_request(config, 'POST', node_endpoint, node_headers, node, None)
        if node_response.status_code == 201:
            node_uri = node_response.headers['location']
            returned_node = json.loads(node_response.text)

            # If Pathauto URL alias creation for nodes is enabled, the location header
            # returns the alias, not the /node/xxx URL, which includes the node ID. In
            # this case, get the node ID from the response body.
            if not re.match(r'/node/\d+$', node_uri):
                node_id = returned_node['nid'][0]['value']
                node_uri = config['host'] + '/node/' + str(node_id)

            if config['progress_bar'] is False:
                print('Node for "' + row['title'] + '" (record ' + id_field + ') created at ' + node_uri + '.')
            logging.info("Node for \"%s (record %s)\" created at %s.", row['title'], id_field, node_uri)
            if 'output_csv' in config.keys():
                write_to_output_csv(config, id_field, node_response.text, row)
        else:
            message = "Node for CSV record " + id_field + " not created"
            print("ERROR: " + message + '.')
            logging.error(message + f', HTTP response code was {node_response.status_code}, response body was {node_response.content}')
            logging.error('JSON request body used in previous POST to "%s" was %s.', node_endpoint, node)
            continue

        # Execute node-specific post-create scripts, if any are configured.
        if 'node_post_create' in config and len(config['node_post_create']) > 0:
            for command in config['node_post_create']:
                post_task_output, post_task_return_code = execute_entity_post_task_script(command, args.config, node_response.status_code, node_response.text)
                if post_task_return_code == 0:
                    logging.info("Post node create script " + command + " executed successfully.")
                else:
                    logging.error("Post node create script " + command + " failed.")

        # For primary tasks, map the ID from CSV of newly created node to its node ID so
        # we can use it for linking child nodes, media, etc.
        if node_response.status_code == 201:
            node_nid = node_uri.rsplit('/', 1)[-1]
            node_ids[id_field] = node_nid
            if config['secondary_tasks'] is not None and len(config['secondary_tasks']) > 0:
                # Only populate the CSV ID->node ID map for items created in the primary task.
                if args.config not in json.loads(os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]):
                    write_to_parent_node_ids_map(config, id_field, node_id)

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)

        write_rollback_node_id(config, node_nid, path_to_rollback_csv_file)

        # If there is no media file (and we're not creating paged content), move on to the next CSV row.
        if config['nodes_only'] is False and config['allow_missing_files'] is False is True and 'file' in row and len(row['file'].strip()) == 0 and config['paged_content_from_directories'] is False:
            if config['progress_bar'] is False:
                print('- No media for ' + node_uri + ' created since its "file" field in the CSV is empty.')
            logging.warning("No media for %s created since its 'file' field in the CSV is empty.", node_uri)
            continue

        if node_response.status_code == 201:
            allowed_media_response_codes = [201, 204]
            if config['nodes_only'] is False and 'file' in row and len(row['file']) != 0:
                media_response_status_code = create_media(config, row['file'], 'file', node_nid, row_for_media)
                if media_response_status_code in allowed_media_response_codes:
                    if config['progress_bar'] is False:
                        print("+ Media for " + row['file'] + " created.")
                    logging.info("Media for %s created.", row['file'])
                else:
                    if config['progress_bar'] is False:
                        print("- ERROR: Media for " + row['file'] + " not created. See log for more information.")
                    logging.error("Media for %s not created (HTTP respone code %s).", row['file'], media_response_status_code)

            if config['nodes_only'] is False and 'additional_files' in config:
                additional_files_config = get_additional_files_config(config)
                if len(additional_files_config) > 0:
                    for additional_file_field, additional_file_media_use_tid in additional_files_config.items():
                        # If there is no additional media file, move on to the next "additional_files" column.
                        if additional_file_field in row and len(row[additional_file_field].strip()) == 0:
                            if config['progress_bar'] is False:
                                print("- Skipping empty additional_media CSV field '{field}' for {uri}.".format(field=additional_file_field, uri=node_uri))
                            logging.warning("- Skipping empty additional_media CSV field '%s' for %s.", node_uri, additional_file_field)
                            continue
                        filename = row[additional_file_field].strip()
                        file_exists = check_file_exists(config, filename)
                        if file_exists is False:
                            if config['progress_bar'] is False:
                                print("- Media for file '{file}' named in field '{field}' of CSV row '{id}' not created. " +
                                      "See log for more information.".format(file=filename, field=additional_file_field, id=row[config['id_field']]))
                            logging.warning('File "%s" from additional_file field "%s" for CSV row "%s" does not exist, cannot create media.', filename, additional_file_field, row[config['id_field']])
                            continue

                        media_response_status_code = create_media(config, row[additional_file_field], additional_file_field, node_nid, row_for_media, additional_file_media_use_tid)
                        if media_response_status_code in allowed_media_response_codes:
                            if config['progress_bar'] is False:
                                print("+ Media for " + row[additional_file_field] + " created.")
                            logging.info("Media for %s created.", row[additional_file_field])
                        else:
                            if config['progress_bar'] is False:
                                print("- Media for " + row[additional_file_field] + " not created. See log for more information.")
                            logging.error("Media for %s not created (HTTP respone code %s).", row[additional_file_field], media_response_status_code)

            if config['nodes_only'] is False and 'file' in row and len(row['file']) == 0 and 'additional_files' not in config and config['paged_content_from_directories'] is False:
                if config['progress_bar'] is False:
                    print('+ No files specified in CSV for row ' + str(id_field) + '.')
                logging.info("No files specified for row %s, so no media created.", str(id_field))

            if config['paged_content_from_directories'] is True:
                # Console output and logging are done in the create_children_from_directory() function.
                create_children_from_directory(config, row_as_parent, node_nid)

            # If 'url_alias' is in the CSV, create the alias.
            if 'url_alias' in row and len(row['url_alias']) > 0:
                create_url_alias(config, node_nid, row['url_alias'])

            write_rollback_config(config, path_to_rollback_csv_file)


def update():
    """Update nodes via PATCH. Note that PATCHing replaces the target field,
       so if we are adding an additional value to a multivalued field, we need
       to include the existing value(s) in our PATCH. The field classes take
       care of preserving existing values in 'append' updates.
    """
    message = '"Update" (' + config['update_mode'] + ') task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    if config['csv_headers'] == 'labels':
        fieldname_map_cache_path = os.path.join(config['temp_dir'], f"node-{config['content_type']}-labels.fieldname_map")
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    field_definitions = get_field_definitions(config, 'node')
    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames
    invalid_target_ids = []

    if config['log_term_creation'] is False:
        logging.info("'log_term_creation' configuration setting is False. Creation of new taxonomy terms will not be logged.")

    row_count = 0
    for row in csv_data:
        if not value_is_numeric(row['node_id']):
            row['node_id'] = get_nid_from_url_alias(config, row['node_id'])
        node_ping_result = ping_node(config, row['node_id'], 'GET', True)
        if node_ping_result is False:
            if config['progress_bar'] is False:
                print("Node " + row['node_id'] + " not found or not accessible, skipping update.")
            logging.warning("Node " + row['node_id'] + " not found or not accessible, skipping update.")
            continue

        # Add the target_id field.
        node = {
            'type': [
                {'target_id': config['content_type']}
            ]
        }

        node_field_values = get_node_field_values(config, row['node_id'])

        # Some optional base fields.
        if 'uid' in csv_column_headers:
            if len(row['uid']) > 0:
                node['uid'] = [{'target_id': row['uid']}]

        if 'langcode' in csv_column_headers:
            if len(row['langcode']) > 0:
                node['langcode'] = [{'value': row['langcode']}]

        if 'created' in csv_column_headers:
            if len(row['created']) > 0:
                node['created'] = [{'value': row['created']}]

        if 'published' in csv_column_headers:
            if len(row['published']) > 0:
                node['status'] = [{'value': row['published']}]

        # Add custom (non-required) fields.
        required_fields = ['node_id']
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        for custom_field in custom_fields:
            node_has_all_fields = True
            # If node doesn't have the field, log that fact and skip updating the field.
            reserved_fields = ['published', 'url_alias']
            if custom_field not in json.loads(node_ping_result) and custom_field not in reserved_fields:
                message = f'Node {row["node_id"]} does not have a "{custom_field}" field, skipping update.'
                print(f'ERROR: ' + message)
                logging.warning(message)
                node_has_all_fields = False
                break

            # Skip updating field if CSV field is empty (other than for 'delete' update mode).
            # For 'delete' update mode it doesn't matter if there's anything in the CSV field,
            # but users expect to be able to supply empty values for this operation.
            if len(row[custom_field].strip()) == 0:
                if config['update_mode'] != 'delete':
                    continue

            # 'url_alias' is a reserved CSV field.
            if custom_field == 'url_alias':
                continue

            # 'image_alt_text' is a reserved CSV field.
            # Issue to add alt text in update task is https://github.com/mjordan/islandora_workbench/issues/166.
            if custom_field == 'image_alt_text':
                continue

            # 'langcode' is a core Drupal field, but is not considered a base field.
            if custom_field == 'langcode':
                continue

            # 'created' is a base field.
            if custom_field == 'created':
                continue

            # 'published' is a reserved CSV field.
            if custom_field == 'published':
                continue

            # 'uid' is a base field.
            if custom_field == 'uid':
                continue

            # Entity reference fields (taxonomy term and node).
            if field_definitions[custom_field]['field_type'] == 'entity_reference':
                entity_reference_field = workbench_fields.EntityReferenceField()
                node = entity_reference_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

            # Typed relation fields (currently, only taxonomy term).
            elif field_definitions[custom_field]['field_type'] == 'typed_relation':
                typed_relation_field = workbench_fields.TypedRelationField()
                node = typed_relation_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

            # Geolocation fields.
            elif field_definitions[custom_field]['field_type'] == 'geolocation':
                geolocation_field = workbench_fields.GeolocationField()
                node = geolocation_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

            # Link fields.
            elif field_definitions[custom_field]['field_type'] == 'link':
                link_field = workbench_fields.LinkField()
                node = link_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

            # Authority Link fields.
            elif field_definitions[custom_field]['field_type'] == 'authority_link':
                link_field = workbench_fields.AuthorityLinkField()
                node = link_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

            # For non-entity reference and non-typed relation fields (text, etc.).
            else:
                simple_field = workbench_fields.SimpleField()
                node = simple_field.update(config, field_definitions, node, row, custom_field, node_field_values[custom_field])

        if node_has_all_fields is True:
            node_endpoint = config['host'] + '/node/' + row['node_id'] + '?_format=json'
            node_headers = {'Content-Type': 'application/json'}
            node_response = issue_request(config, 'PATCH', node_endpoint, node_headers, node)

            if node_response.status_code == 200:
                if config['progress_bar'] is False:
                    print("Node " + config['host'] + '/node/' + row['node_id'] + " updated.")
                logging.info("Node %s updated.", config['host'] + '/node/' + row['node_id'])

            # Execute node-specific post-create scripts, if any are configured.
            if 'node_post_update' in config and len(config['node_post_update']) > 0:
                for command in config['node_post_update']:
                    post_task_output, post_task_return_code = execute_entity_post_task_script(command, args.config, node_response.status_code, node_response.text)
                    if post_task_return_code == 0:
                        logging.info("Post node update script " + command + " executed successfully.")
                    else:
                        logging.error("Post node update script " + command + " failed.")

            if config['progress_bar'] is True:
                row_count += 1
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)

            # If 'url_alias' is in the CSV, create the alias.
            if 'url_alias' in row and len(row['url_alias']) > 0:
                create_url_alias(config, row['node_id'], row['url_alias'])


def delete():
    """Delete nodes.
    """
    message = '"Delete" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        if not value_is_numeric(row['node_id']):
            row['node_id'] = get_nid_from_url_alias(config, row['node_id'])
        if not ping_node(config, row['node_id']):
            if config['progress_bar'] is False:
                message = f"Node {row['node_id']} not found or not accessible, skipping delete."
                print(message)
            logging.warning(message)
            continue

        # Delete the node's media first.
        if config['delete_media_with_nodes'] is True:
            media_endpoint = config['host'] + '/node/' + str(row['node_id']) + '/media?_format=json'
            media_response = issue_request(config, 'GET', media_endpoint)
            media_response_body = json.loads(media_response.text)
            media_messages = []
            for media in media_response_body:
                if 'mid' in media:
                    media_id = media['mid'][0]['value']
                    media_delete_status_code = remove_media_and_file(config, media_id)
                    if media_delete_status_code == 204:
                        media_messages.append("+ Media " + config['host'] + '/media/' + str(media_id) + " deleted.")

        node_endpoint = config['host'] + '/node/' + str(row['node_id']) + '?_format=json'
        node_response = issue_request(config, 'DELETE', node_endpoint)
        if node_response.status_code == 204:
            if config['progress_bar'] is False:
                print("Node " + config['host'] + '/node/' + str(row['node_id']) + " deleted.")
            logging.info("Node %s deleted.", config['host'] + '/node/' + str(row['node_id']))
        if config['delete_media_with_nodes'] is True and config['progress_bar'] is False:
            if len(media_messages):
                for media_message in media_messages:
                    print(media_message)

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def add_media():
    """Add media to existing nodes.
    """
    message = '"Add media" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        if not value_is_numeric(row['node_id']):
            row['node_id'] = get_nid_from_url_alias(config, row['node_id'])
        if not ping_node(config, row['node_id']):
            print("Node " + row['node_id'] + " not found or not accessible, skipping adding media.")
            continue

        allowed_media_response_codes = [201, 204]

        node_json_url = config['host'] + '/node/' + str(row['node_id']) + '?_format=json'
        node_uri = config['host'] + '/node/' + str(row['node_id'])
        node_response = issue_request(config, 'HEAD', node_json_url)

        if 'media_use_tid' in row:
            media_use_tid_value = row['media_use_tid']
        else:
            # Get media use TID from config within create_media().
            media_use_tid_value = None

        if node_response.status_code == 200:
            if 'additional_files' not in config:
                if config['allow_missing_files'] is False:
                    if not check_file_exists(config, row['file']):
                        message = 'File ' + row['file'] + ' identified in CSV "file" column in for node ID ' + row['node_id'] + ' not found.'
                        logging.error(message)
                        sys.exit('Error: ' + message)
                    if check_file_exists(config, row['file']):
                        media_response_status_code = create_media(config, row['file'], 'file', row['node_id'], row, media_use_tid_value)
                        if media_response_status_code in allowed_media_response_codes:
                            if config['progress_bar'] is False:
                                print("Media for " + row['file'] + " created and added to " + node_uri)
                            logging.info("Media for %s created and added to %s.", row['file'], node_uri)
                        else:
                            if config['progress_bar'] is False:
                                print("ERROR: Media for " + row['file'] + " not created. See log for more information.")
                            logging.error("Media for %s not created (HTTP respone code %s).", row['file'], media_response_status_code)
                    else:
                        message = "Warning: Media for node " + row['node_id'] + " not created since CSV column 'file' is empty."
                        logging.error(message)
                        sys.exit('Error: ' + message)
                else:
                    if check_file_exists(config, row['file']):
                        media_response_status_code = create_media(config, row['file'], 'file', row['node_id'], row, media_use_tid_value)
                        if media_response_status_code in allowed_media_response_codes:
                            if config['progress_bar'] is False:
                                print("Media for " + row['file'] + " created and added to " + node_uri)
                            logging.info("Media for %s created and added to %s.", row['file'], node_uri)
                        else:
                            if config['progress_bar'] is False:
                                print("ERROR: Media for " + row['file'] + " not created. See log for more information.")
                            logging.error("Media for %s not created (HTTP respone code %s).", row['file'], media_response_status_code)
                    else:
                        message = "Warning: Media for node " + row['node_id'] + " not created since CSV column 'file' is empty."
                        logging.error(message)
                        sys.exit('Error: ' + message)
            if 'additional_files' in config:
                additional_files_config = get_additional_files_config(config)
                if len(additional_files_config) > 0:
                    for additional_file_field, additional_file_media_use_tid in additional_files_config.items():
                        if config['allow_missing_files'] is False:
                            if not check_file_exists(config, row['file']):
                                message = 'File ' + row[additional_file_field] + ' identified in CSV "' + additional_file_field + '" column in for node ID ' + row['node_id'] + ' not found.'
                                logging.error(message)
                                sys.exit('Error: ' + message)
                        else:
                            if len(row[additional_file_field].strip()) == 0:
                                if config['progress_bar'] is False:
                                    print("Warning: Media for " + row['node_id'] + " not created since CSV column '" + additional_file_field + "' is empty.")
                                logging.warning("Media for node %s not created since CSV column '" + additional_file_field + "' is empty", row['node_id'])
                                continue
                            else:
                                file_exists = check_file_exists(config, row[additional_file_field])
                                if file_exists is False:
                                    if config['progress_bar'] is False:
                                        print('- No media for ' + node_uri + ' created since its "' + additional_file_field + '" field in the CSV is empty.')
                                    logging.warning("No media for %s created since its '%s' field in the CSV is empty.", node_uri, additional_file_field)
                                    continue
                                media_response_status_code = create_media(config, row[additional_file_field], additional_file_field, row['node_id'], row, additional_file_media_use_tid)
                                if media_response_status_code in allowed_media_response_codes:
                                    if config['progress_bar'] is False:
                                        print("Media for " + row[additional_file_field] + " created and added to " + node_uri + ".")
                                    logging.info("Media for %s created and added to %s.", row[additional_file_field], node_uri)
                                else:
                                    if config['progress_bar'] is False:
                                        print("ERROR: Media for " + row[additional_file_field] + " not created. See log for more information.")
                                    logging.error("Media for %s not created (HTTP response code %s).", row[additional_file_field], media_response_status_code)
        else:
            if config['progress_bar'] is False:
                print("ERROR: Node at " + node_uri + " does not exist or is not accessible.")
            logging.error("Node at %s does not exist or is not accessible (HTTP response code %s)", node_uri, node_response.status_code)

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def update_media():
    """Placeholder function. See https://github.com/mjordan/islandora_workbench/issues/76 for more info.
    """


def delete_media():
    """Delete media from media IDs in the input CSV.
    """
    message = '"Delete media" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        if not value_is_numeric(row['media_id']):
            row['media_id'] = get_mid_from_media_url_alias(config, row['node_id'])
        media_delete_status_code = remove_media_and_file(config, row['media_id'])
        if media_delete_status_code == 204:
            if config['progress_bar'] is False:
                message = "Media " + config['host'] + '/media/' + str(row['media_id']) + " and associated file deleted."
                print(message)
                logging.info(message)
            else:
                message = "Media " + config['host'] + '/media/' + str(row['media_id']) + " and associated file not deleted."
                print("ERROR: " + message + " See log for more information.")
                logging.error(message + " HTTP response code %s.", media_delete_status_code)

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def delete_media_by_node():
    """Delete all media from node IDs in the input CSV.
    """
    message = '"Deleting media by node" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        if not value_is_numeric(row['node_id']):
            row['node_id'] = get_nid_from_url_alias(config, row['row_id'])

        node_ping_result = ping_node(config, row['node_id'], 'GET', True)
        if node_ping_result is False:
            if config['progress_bar'] is False:
                message = "Node " + row['node_id'] + " not found or not accessible, skipping deleting media."
                print(message)
            logging.warning(message)
            continue

        media_ids_from_node = get_node_media_ids(config, row['node_id'], config['delete_media_by_node_media_use_tids'])
        if media_ids_from_node is not False:
            if len(media_ids_from_node) > 0:
                for media_id in media_ids_from_node:
                    media_delete_status_code = remove_media_and_file(config, media_id)
                    if media_delete_status_code == 204:
                        if config['progress_bar'] is False:
                            message = "Node " + row['node_id'] + "'s media " + config['host'] + '/media/' + str(media_id) + " and associated files deleted."
                            print(message)
                            logging.info(message)
                        else:
                            message = "Node " + row['node_id'] + "'s media " + config['host'] + '/media/' + str(media_id) + " and associated files not deleted."
                            print("ERROR: " + message + " See log for more information.")
                            logging.error(message + " HTTP response code %s.", media_delete_status_code)

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def create_from_files():
    """Create new nodes from files only (no CSV), and add media. The nodes will
       have a title (derived from filename), and a config-defined Islandora model,
       content type, and status. Media use is derived from config as well.
    """
    message = '"Create from files" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    file_dir_path = config['input_dir']
    files = os.listdir(file_dir_path)

    path_to_rollback_csv_file = get_rollback_csv_filepath(config)
    prep_rollback_csv(config, path_to_rollback_csv_file)
    logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)

    num_files = len(files)
    file_count = 0
    for file_name in files:
        if file_name.startswith('rollback.') and file_name.endswith('csv'):
            continue

        filename_without_extension = os.path.splitext(file_name)[0]
        if len(filename_without_extension) > config['max_node_title_length']:
            message = 'Truncating the filename "' + filename_without_extension + '" since it exceeds maximum node title length of ' + str(config['max_node_title_length']) + ' characters.'
            logging.error(message)
            filename_without_extension = filename_without_extension[:255]

        node_json = {
            'type': [
                {'target_id': config['content_type'],
                 'target_type': 'node_type'}
            ],
            'title': [
                {'value': filename_without_extension}
            ],
            'status': [
                {'value': config['published']}
            ]
        }

        # Add field_model if that field exists in the current content type.
        entity_fields = get_entity_fields(config, 'node', config['content_type'])
        if 'field_model' in entity_fields:
            islandora_model = set_model_from_extension(file_name, config)
            node_json['field_model'] = [{'target_id': islandora_model, 'target_type': 'taxonomy_term'}]

        node_headers = {
            'Content-Type': 'application/json'
        }
        node_endpoint = '/node?_format=json'
        node_response = issue_request(
            config,
            'POST',
            node_endpoint,
            node_headers,
            node_json,
            None)
        if node_response.status_code == 201:
            node_uri = node_response.headers['location']
            if config['progress_bar'] is False:
                print('Node for "' + filename_without_extension + '" created at ' + node_uri + '.')
            logging.info(
                'Node for "%s" created at %s.',
                filename_without_extension,
                node_uri)
            if 'output_csv' in config.keys():
                write_to_output_csv(config, '', node_response.text)

            node_nid = node_uri.rsplit('/', 1)[-1]
            write_rollback_node_id(config, node_nid, path_to_rollback_csv_file)

            # Execute node-specific post-create scripts, if any are configured.
            if 'node_post_create' in config and len(config['node_post_create']) > 0:
                for command in config['node_post_create']:
                    post_task_output, post_task_return_code = execute_entity_post_task_script(command, args.config, node_response.status_code, node_response.text)
                    if post_task_return_code == 0:
                        logging.info("Post node create script " + command + " executed successfully.")
                    else:
                        logging.error("Post node create script " + command + " failed.")

            file_path = os.path.join(config['input_dir'], file_name)
            fake_csv_record = collections.OrderedDict()
            fake_csv_record['title'] = filename_without_extension
            fake_csv_record['file'] = file_path

            media_type = set_media_type(config, file_path, 'file', fake_csv_record)

            if media_type == 'image':
                fake_csv_record['image_alt_text'] = filename_without_extension
            media_response_status_code = create_media(config, file_name, 'file', node_nid, fake_csv_record)
            allowed_media_response_codes = [201, 204]
            if media_response_status_code in allowed_media_response_codes:
                if config['progress_bar'] is False:
                    print("+ Media for " + filename_without_extension + " created.")
                logging.info("Media for %s created.", file_path)
        else:
            logging.error('Node for "%s" not created, HTTP response code was %s.', os.path.join(config['input_dir'], file_name), node_response.status_code)

        if config['progress_bar'] is True:
            file_count += 1
            file_position = get_percentage(file_count, num_files)
            pbar(file_position)

    if config['progress_bar'] is True:
        pbar(100)


def export_csv():
    """Export a CSV file with values, in Islandora Workbench format,
       for each node in the input CSV.
    """
    message = '"Export CSV" task started using config file ' + args.config + '.'
    if config['export_csv_term_mode'] == 'name':
        message = message + ' The "export_csv_term_mode" configuration option is set to "name", which will slow down the export.'
    print(message)
    logging.info(message)

    field_definitions = get_field_definitions(config, 'node')

    field_labels = collections.OrderedDict()
    field_names = list()
    for field_name in field_definitions.keys():
        field_names.append(field_name)
    for field_name in ['created', 'uid', 'langcode', 'title', 'node_id', 'REMOVE THIS COLUMN (KEEP THIS ROW)']:
        field_names.insert(0, field_name)

    if len(config['export_csv_field_list']) > 0:
        field_names = config['export_csv_field_list']

    deduped_field_names = list('')
    [deduped_field_names.append(x) for x in field_names if x not in deduped_field_names]
    # We always include 'node_id and 'REMOVE THIS COLUMN (KEEP THIS ROW)'.
    if 'node_id' not in deduped_field_names:
        deduped_field_names.insert(0, 'node_id')
        deduped_field_names.insert(0, 'REMOVE THIS COLUMN (KEEP THIS ROW)')

    for field_name in field_definitions:
        if field_name in deduped_field_names:
            if field_definitions[field_name]['label'] != '':
                field_labels[field_name] = field_definitions[field_name]['label']
            else:
                field_labels[field_name] = ''
    field_labels['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'LABEL (REMOVE THIS ROW)'

    if config['export_csv_file_path'] is not None:
        csv_file_path = config['export_csv_file_path']
    else:
        csv_file_path = os.path.join(config['input_dir'], config['input_csv'] + '.csv_file_with_field_values')
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)

    if config['export_file_directory'] is not None and 'file' not in deduped_field_names:
        deduped_field_names.append('file')

    csv_file = open(csv_file_path, 'a+', encoding='utf-8')
    writer = csv.DictWriter(csv_file, fieldnames=deduped_field_names, lineterminator="\n")
    writer.writeheader()

    writer.writerow(field_labels)

    cardinality = collections.OrderedDict()
    cardinality['REMOVE THIS COLUMN (KEEP THIS ROW)'] = 'NUMBER OF VALUES ALLOWED (REMOVE THIS ROW)'
    cardinality['node_id'] = '1'
    cardinality['uid'] = '1'
    cardinality['langcode'] = '1'
    cardinality['created'] = '1'
    cardinality['title'] = '1'
    for field_name in field_definitions:
        if field_definitions[field_name]['cardinality'] == -1:
            cardinality[field_name] = 'unlimited'
        else:
            cardinality[field_name] = field_definitions[field_name]['cardinality']

    cardinality_filtered = collections.OrderedDict()
    for cardinality_key in cardinality.keys():
        if cardinality_key in deduped_field_names:
            cardinality_filtered[cardinality_key] = cardinality[cardinality_key]
    writer.writerow(cardinality_filtered)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        output_row = collections.OrderedDict()
        if not value_is_numeric(row['node_id']):
            row['node_id'] = get_nid_from_url_alias(config, row['node_id'])
        if not ping_node(config, row['node_id']):
            if config['progress_bar'] is False:
                print("Node " + row['node_id'] + " not found or not " + "accessible, skipping export.")
            logging.warning("Node " + row['node_id'] + " not found or not " + "accessible, skipping export.")
            continue

        # Get node.
        url = f"{config['host']}/node/{row['node_id']}?_format=json"
        response = issue_request(config, 'GET', url)
        if response.status_code == 200:
            body = json.loads(response.text)
            if body['type'][0]['target_id'] != config['content_type']:
                message = f"Node {row['node_id']} not written to output CSV because its content type {body['type'][0]['target_id']}" + \
                    f" does not match the \"content_type\" configuration setting."
                if config['progress_bar'] is False:
                    print("Error: " + message)
                logging.error(message)
                continue

            for fieldname_to_serialize in deduped_field_names:
                if fieldname_to_serialize in body and fieldname_to_serialize in field_definitions:
                    csv_data = serialize_field_json(config, field_definitions, fieldname_to_serialize, body[fieldname_to_serialize])
                    output_row[fieldname_to_serialize] = csv_data

            if config['export_file_directory'] is not None:
                downloaded_file_name = download_file_from_drupal(config, row['node_id'])
                output_row['file'] = downloaded_file_name
        else:
            message = f"Attempt to get node {row['node_id']} returned a {response.status_code} status code."
            print("  Error: " + message)
            logging.warning(message)
            return False

        output_row['node_id'] = row['node_id']
        writer.writerow(output_row)

        if config['export_file_directory'] is not None:
            and_files = f"and file "
        else:
            and_files = ''
        message = f"Exporting data {and_files}for node {row['node_id']} \"{body['title'][0]['value']}\"."

        if config['progress_bar'] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)
        else:
            print(message)

        logging.info(message)

    csv_file.close()

    if config['progress_bar'] is True:
        pbar(100)
    else:
        print('CSV export saved at ' + csv_file_path + '.')


def get_data_from_view():
    """Retrieve data from a Drupal View via its REST export display.

       Note: We won't be able to use the progress_bar option in this task until
       https://www.drupal.org/project/drupal/issues/2982729 is resolved, since
       we have no way of knowing how many items are in the View output til then.
    """
    message = '"Get data from View" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    view_parameters = '&'.join(config['view_parameters']) if 'view_parameters' in config else ''
    view_url = config['host'] + '/' + config['view_path'].lstrip('/') + '?page=0&' + view_parameters
    view_path_status_code = ping_view_endpoint(config, view_url)
    if view_path_status_code != 200:
        message = f"Cannot access View at {view_url}."
        logging.error(message + " HTTP status code is " + str(view_path_status_code) + ".")
        sys.exit("Error: " + message + " See log for more information.")

    if config['export_csv_file_path'] is not None:
        csv_file_path = config['export_csv_file_path']
    else:
        csv_file_path = os.path.join(config['input_dir'], os.path.basename(args.config).split('.')[0] + '.csv_file_with_data_from_view')
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)

    if config['export_file_directory'] is not None:
        if not os.path.exists(config['export_csv_file_path']):
            try:
                os.mkdir(config['export_csv_file_path'])
                os.rmdir(config['export_csv_file_path'])
            except Exception as e:
                message = 'Path in configuration option "export_csv_file_path" ("' + config['export_csv_file_path'] + '") is not writable.'
                logging.error(message + ' ' + str(e))
                sys.exit('Error: ' + message + ' See log for more detail.')

    field_definitions = get_field_definitions(config, 'node')

    if len(config['export_csv_field_list']) > 0:
        field_names = config['export_csv_field_list']
        # Always include node_id and title.
        field_names.insert(0, 'title')
        field_names.insert(0, 'node_id')
    else:
        field_names = []
        for field_name in field_definitions.keys():
            if field_name.startswith('field_'):
                field_names.insert(0, field_name)
        # Always include node_id and title.
        field_names.insert(0, 'title')
        field_names.insert(0, 'node_id')

    deduped_field_names = list('')
    [deduped_field_names.append(x) for x in field_names if x not in deduped_field_names]

    if config['export_file_directory'] is not None and 'file' not in deduped_field_names:
        deduped_field_names.append('file')

    csv_file = open(csv_file_path, 'a+', encoding='utf-8')
    writer = csv.DictWriter(csv_file, fieldnames=deduped_field_names, lineterminator="\n")
    writer.writeheader()

    seen_nids = list()

    view_url = config['host'] + '/' + config['view_path'].lstrip('/') + '?page='
    # Seed the first page of node IDs.
    page = 0
    url = view_url + str(0) + '&' + view_parameters
    response = issue_request(config, 'GET', url)
    if response.status_code != 200:
        message = f"Request to View at {url} returned a non-200 status ({response.status_code})."
        logging.error(message)
        sys.exit("Error: " + message)
    nodes = json.loads(response.text)
    for node in nodes:
        if node['nid'][0]['value'] not in seen_nids:
            if node['type'][0]['target_id'] == config['content_type']:
                seen_nids.append(node['nid'][0]['value'])
                row = dict()
                row['node_id'] = node['nid'][0]['value']
                row['title'] = node['title'][0]['value']

                if config['export_file_directory'] is not None:
                    and_files = f"and file "
                else:
                    and_files = ''
                message = f"Exporting data {and_files}for node {row['node_id']} \"{row['title']}\"."
                print(message)
                logging.info(message)

                for field_name in deduped_field_names:
                    if field_name.startswith('field_') and field_name in node:
                        csv_data = serialize_field_json(config, field_definitions, field_name, node[field_name])
                        row[field_name] = csv_data

                if config['export_file_directory'] is not None:
                    downloaded_file_name = download_file_from_drupal(config, row['node_id'])
                    row['file'] = downloaded_file_name

                writer.writerow(row)
        else:
            message = f"Node {node['nid'][0]['value']} not written to output CVS because its content type (" + \
                f"{node['type'][0]['target_id']} does not match the \"content_type\" configuration setting."
            print("Warning: " + message)
            logging.warning(message)
            continue

    # Loop through the remaining pages, until we encounter an empty page.
    while len(nodes) > 0:
        page += 1
        url = view_url + str(page) + '&' + view_parameters
        response = issue_request(config, 'GET', url)
        if response.status_code != 200:
            message = f"Request to View at {url} returned a non-200 status ({response.status_code}); page {page} of results not written to the output CSV file."
            logging.error(message)
            continue
        nodes = json.loads(response.text)
        for node in nodes:
            if node['nid'][0]['value'] not in seen_nids:
                if node['type'][0]['target_id'] == config['content_type']:
                    seen_nids.append(node['nid'][0]['value'])
                    row = dict()
                    row['node_id'] = node['nid'][0]['value']
                    row['title'] = node['title'][0]['value']

                    if config['export_file_directory'] is not None:
                        and_files = f"and file "
                    else:
                        and_files = ''
                    message = f"Exporting data {and_files}for node {row['node_id']} \"{row['title']}\"."
                    print(message)
                    logging.info(message)

                    for field_name in deduped_field_names:
                        if field_name.startswith('field_') and field_name in node:
                            csv_data = serialize_field_json(config, field_definitions, field_name, node[field_name])
                            row[field_name] = csv_data

                    if config['export_file_directory'] is not None:
                        downloaded_file_name = download_file_from_drupal(config, row['node_id'])
                        row['file'] = downloaded_file_name

                    writer.writerow(row)
                else:
                    message = f"Node {node['nid'][0]['value']} not written to output CVS because its content type (" + \
                        f"{node['type'][0]['target_id']} does not match the \"content_type\" configuration setting."
                    print("Warning: " + message)
                    logging.warning(message)
                    continue

    csv_file.close()
    message = "CSV file is available at " + csv_file_path + '.'
    logging.info(message)
    print(message)


def create_terms():
    """Create new terms via POST.
    """
    message = '"Create terms" task started using config file ' + args.config + '.'
    print(message)
    logging.info(message)

    if config['csv_headers'] == 'labels':
        fieldname_map_cache_path = os.path.join(config['temp_dir'], f"taxonomy_term-{config['vocab_id']}-labels.fieldname_map")
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    if config['progress_bar'] is True:
        csv_data_to_count = list(get_csv_data(config))
        num_csv_records = len(csv_data_to_count)
        pbar = InitBar()

    # These should be set in WorkbenchConfig.get_config() but aren't
    # taking effect there. @todo: address later.
    config['allow_adding_terms'] = True
    config['id_field'] = 'term_name'

    # This is the CSV data for creating non-hierarchical or child terms.
    csv_data = get_csv_data(config)

    # We also need copies of the vocabulary CSV data for the parent checks.
    is_parent_check_csv_data = get_csv_data(config)
    parent_csv_data = get_csv_data(config)

    # First check for any terms that are designated as parents (i.e., they
    # are in the 'parent' column in at least one row).
    is_parent = list()
    for is_parent_check_row in is_parent_check_csv_data:
        if 'parent' in is_parent_check_row and len(str(is_parent_check_row['parent']).strip()) > 0:
            is_parent.append(is_parent_check_row['parent'])

    term_row_count = 0
    # Then, get the CSV rows for each of the terms in is_parent and create the terms
    # so their IDs are available to the child terms.
    if len(is_parent) > 0:
        for parent_row in parent_csv_data:
            term_row_count += 1
            if parent_row['term_name'] in is_parent:
                term_exists = find_term_in_vocab(config, config['vocab_id'], parent_row['term_name'])
                if term_exists is False:
                    parent_term_id = create_term(config, config['vocab_id'], parent_row['term_name'], parent_row)
                    if config['progress_bar'] is not True:
                        if parent_term_id is not False:
                            print('Term "' + parent_row['term_name'] + '" created.')
                        else:
                            print('Error: Term "' + parent_row['term_name'] + '" not created. See log for more information.')
                else:
                    message = 'Term "' + parent_row['term_name'] + '" already exists in the "' + config['vocab_id'] + '" vocabulary, skipping.'
                    if config['progress_bar'] is not True:
                        print(message)
                    logging.info(message)
                    continue

            if config['progress_bar'] is True:
                term_row_position = get_percentage(term_row_count, num_csv_records)
                pbar(term_row_position)

    # Now that we have created all terms that are parents, null out these copies
    # of the CSV data, no need to keep them around.
    parent_csv_data = None
    is_parent_check_csv_data = None

    # Finally, create any non-existent child terms.
    for row in csv_data:
        # If it's a parent term, it will have been created above.
        if row['term_name'] in is_parent:
            continue
        term_row_count += 1
        term_exists = find_term_in_vocab(config, config['vocab_id'], row['term_name'])
        if term_exists is False:
            term_id = create_term(config, config['vocab_id'], row['term_name'], row)
            # Successful creation, and failure, is logged in create_term().
            if config['progress_bar'] is not True:
                if term_id is not False:
                    print('Term "' + row['term_name'] + '" created.')
                else:
                    print('Error: Term "' + row['term_name'] + '" not created. See log for more information.')
        else:
            message = 'Term "' + row['term_name'] + '" already exists in the "' + config['vocab_id'] + '" vocabulary, skipping.'
            if config['progress_bar'] is not True:
                print(message)
            logging.info(message)
            continue

        if config['progress_bar'] is True:
            term_row_position = get_percentage(term_row_count, num_csv_records)
            pbar(term_row_position)

    if config['progress_bar'] is True:
        pbar(100)


def update_terms():
    """Placeholder function. See https://github.com/mjordan/islandora_workbench/issues/469 for more info.
    """


# Main program logic.

parser = argparse.ArgumentParser()
parser.add_argument('--config', required=True, help='Configuration file to use.')
parser.add_argument('--check', help='Check input data and exit without creating/updating/etc.', action='store_true')
parser.add_argument('--get_csv_template', help='Generate a CSV template using the specified configuration file.', action='store_true')
parser.add_argument('--quick_delete_node', help='Delete the node (and all attached media) identified by the URL).')
parser.add_argument('--quick_delete_media', help='Delete the media (and attached file) identified by the URL).')
parser.add_argument('--contactsheet', help='Generate a contact sheet.', action='store_true')
args = parser.parse_args()
workbench_config = WorkbenchConfig(args)

config = workbench_config.get_config()

create_temp_dir(config)

if config['secondary_tasks'] is not None and len(config['secondary_tasks']) > 0:
    secondary_tasks = []
    for secondary_config_file in config['secondary_tasks']:
        secondary_tasks.append(os.path.abspath(secondary_config_file))
    secondary_tasks_registry_string = json.dumps(secondary_tasks)
    os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"] = secondary_tasks_registry_string
    # We can't use the temp_dir to cache this list since the secondary tasks may not use
    # the same temp_dir as the primary task.
    os.environ["ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR"] = os.path.abspath(config['temp_dir'])

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    filename=config['log_file_path'],
    level=logging.INFO,
    filemode=config['log_file_mode'],
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%d-%b-%y %H:%M:%S')

if 'check' in config.keys():
    tasks_to_skip = ['create_from_files', 'get_data_from_view']
    if config['check'] is False and config['task'] not in tasks_to_skip:
        csv_data_to_count = get_csv_data(config)
        num_csv_records = len(list(csv_data_to_count))
        if num_csv_records == 0:
            zero_data_rows_message = f"Input CSV \"{config['input_csv']}\" contains 0 data rows, exiting."
            logging.warning(zero_data_rows_message)
            sys.exit("WARNING: " + zero_data_rows_message)

# Execute bootstrap scripts, if any are configured.
if config['check'] is False and 'bootstrap' in config and len(config['bootstrap']) > 0:
    for command in config['bootstrap']:
        print("Executing bootstrap script " + command)
        output, return_code = execute_bootstrap_script(command, args.config)
        if return_code == 0:
            logging.info(f"Bootstrap script {command} executed successfully.")
        else:
            logging.error(f"Bootstrap script {command} failed with exit code {str(return_code)}.")

if config['task'] != 'create_from_files' and config['input_csv'].startswith('http') is True:
    get_csv_from_google_sheet(config)
if config['task'] != 'create_from_files' and config['input_csv'].endswith('.xlsx') is True:
    get_csv_from_excel(config)

validate_input_dir(config)

ping_islandora(config, print_message=True)
check_integration_module_version(config)

if config['enable_http_cache'] is True:
    requests_cache.install_cache(backend='memory')

cache_enabled = requests_cache.patcher.is_installed()
if cache_enabled:
    message = "Client-side request caching is enabled."
else:
    message = "Client-side request caching is not enabled."
logging.info(message)

if config['nodes_only'] is False:
    check_drupal_core_version(config)

csv_subset_warning(config)

# Apparently, there's no built-in way of getting the number of items in a
# DictReader, so we read the CSV file, convert it to a list, and get its length.
if config['progress_bar'] is True:
    if config['task'] != 'create_from_files':
        csv_data_as_list = list(get_csv_data(config))
        num_csv_records = len(csv_data_as_list)
    pbar = InitBar()

if 'get_csv_template' in config.keys():
    if config['get_csv_template']:
        # At the end of this function, Workbench exists, so code after this is not executed.
        get_csv_template(config, args)

if args.quick_delete_node is not None:
    # At the end of this function, Workbench exists, so code after this is not executed.
    quick_delete_node(config, args)

if args.quick_delete_media is not None:
    # At the end of this function, Workbench exists, so code after this is not executed.
    quick_delete_media(config, args)

try:
    if 'check' in config.keys():
        if config['check']:
            if config['task'] == 'create_from_files':
                check_input_for_create_from_files(config, args)
            else:
                check_input(config, args)
except KeyboardInterrupt:
    print('Exiting before entire --check completed.')
    logging.warning('Workbench exiting after receiving "ctrl-c" during --check.')
    try:
        sys.exit(0)
    except SystemExit:
        os._exit(0)

try:
    if config['task'] == 'create':
        create()
    if config['task'] == 'update':
        update()
    if config['task'] == 'delete':
        delete()
    if config['task'] == 'add_media':
        add_media()
    if config['task'] == 'delete_media':
        delete_media()
    if config['task'] == 'delete_media_by_node':
        delete_media_by_node()
    if config['task'] == 'create_from_files':
        create_from_files()
    if config['task'] == 'export_csv':
        export_csv()
    if config['task'] == 'get_data_from_view':
        get_data_from_view()
    if config['task'] == 'create_terms':
        create_terms()

    if config['secondary_tasks'] is not None and len(config['secondary_tasks']) > 0:
        for secondary_config_file in config['secondary_tasks']:
            message = 'Executing secondary task using configuration file ' + secondary_config_file + '.'
            print('')
            print(message)
            logging.info(message)
            cmd = [config['path_to_python'], config['path_to_workbench_script'], "--config", secondary_config_file]
            output = subprocess.run(cmd)

    # Execute shutdown scripts, if any are configured.
    if config['check'] is False and 'shutdown' in config and len(config['shutdown']) > 0:
        for command in config['shutdown']:
            print("Executing shutdown script " + command)
            output, return_code = execute_shutdown_script(command, args.config)
            if return_code == 0:
                logging.info(f"Shutdown script {command} executed successfully.")
            else:
                logging.error(f"Shutdown script {command} failed with exit code {str(return_code)}.")

    logging.info(f"Islandora Workbench successfully completed.")

    if os.environ.get('ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR') is not None:
        os.environ.pop('ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR')
    if os.environ.get('ISLANDORA_WORKBENCH_SECONDARY_TASKS') is not None:
        os.environ.pop('ISLANDORA_WORKBENCH_SECONDARY_TASKS')


except KeyboardInterrupt:
    print('Exiting before entire CSV processed. See log for more info.')
    logging.warning('Workbench exiting after receiving "ctrl-c". Consult the documentation to learn how to resume your batch.')
    try:
        sys.exit(0)
    except SystemExit:
        os._exit(0)