From 063719574697bd9c6793db61d2eb48066d6de284 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Thu, 5 Sep 2024 15:28:29 -0700 Subject: [PATCH 1/9] See ticket#120 --- docker/load_mongodump.sh | 28 ++++- utils/db_utils.py | 257 ++++++++++++++++++++++++--------------- 2 files changed, 181 insertions(+), 104 deletions(-) diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index bd6fcea..9039b59 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -1,10 +1,28 @@ +#!/bin/bash + +# Check if the correct number of arguments is provided +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Assign arguments to variables MONGODUMP_FILE=$1 +DATABASE_NAME=$2 +DOCKER_CONTAINER_NAME=$3 -echo "Copying file to docker container" -docker cp $MONGODUMP_FILE op-admin-dashboard-db-1:/tmp +# Extract the base name of the dump file for use in the restore command +FILE_NAME=$(basename $MONGODUMP_FILE) -FILE_NAME=`basename $MONGODUMP_FILE` +# Drop the existing database to ensure it’s clean before restoring the new dump +echo "Dropping the existing database $DATABASE_NAME" +docker exec $DOCKER_CONTAINER_NAME mongo $DATABASE_NAME --eval "db.dropDatabase()" -echo "Restoring the dump from $FILE_NAME" -docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore' +# Copy the MongoDB dump file from the local machine to the Docker container +echo "Copying file to Docker container $DOCKER_CONTAINER_NAME" +docker cp $MONGODUMP_FILE $DOCKER_CONTAINER_NAME:/tmp +# Restore the dump into the specified database +echo "Restoring the dump from $FILE_NAME to database $DATABASE_NAME" +docker exec -e MONGODUMP_FILE=$FILE_NAME $DOCKER_CONTAINER_NAME bash -c \ + 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore -d '"$DATABASE_NAME"' dump/openpath_prod_ca_ebike' diff --git a/utils/db_utils.py b/utils/db_utils.py index 555ed5d..5aad065 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -17,16 +17,19 @@ from utils import permissions as perm_utils from utils.datetime_utils import iso_range_to_ts_range + def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = []): """ Returns a dictionary of df records, given a dataframe, a column to filter on, and a list of values that rows in that column will be excluded if they match """ - if df.empty: return [] - if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] + if df.empty: + return [] + if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] df = df[~df[col_to_filter].isin(vals_to_exclude)] return df.to_dict("records") + def query_uuids(start_date: str, end_date: str, tz: str): # As of now, time filtering does not apply to UUIDs; we just query all of them. # Vestigial code commented out and left below for future reference @@ -58,18 +61,20 @@ def query_uuids(start_date: str, end_date: str, tz: str): entries = edb.get_uuid_db().find() df = pd.json_normalize(list(entries)) if not df.empty: - df['update_ts'] = pd.to_datetime(df['update_ts']) - df['user_id'] = df['uuid'].apply(str) - df['user_token'] = df['user_email'] + df["update_ts"] = pd.to_datetime(df["update_ts"]) + df["user_id"] = df["uuid"].apply(str) + df["user_token"] = df["user_email"] df.drop(columns=["uuid", "_id"], inplace=True) return df + def query_confirmed_trips(start_date: str, end_date: str, tz: str): (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic - df = ts.get_data_df("analysis/confirmed_trip", + df = ts.get_data_df( + "analysis/confirmed_trip", time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts), ) user_input_cols = [] @@ -82,36 +87,64 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # for backwards compatibility. We do this for all columns since columns which don't exist are ignored by the rename command. rename_cols = constants.VALID_TRIP_COLS # the mapping is `{distance: data.distance, duration: data.duration} etc - rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols)) + rename_mapping = dict( + zip([c.replace("data.", "") for c in rename_cols], rename_cols) + ) logging.debug("Rename mapping is %s" % rename_mapping) df.rename(columns=rename_mapping, inplace=True) logging.debug("After renaming columns, they are %s" % df.columns) # Now copy over the coordinates - df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"]) - df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"]) - - # Add primary modes from the sensed, inferred and ble summaries. Note that we do this - # **before** filtering the `all_trip_columns` because the - # *_section_summary columns are not currently valid - get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID" - df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary) - df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary) - if 'ble_sensed_summary' in df.columns: - df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary) + df["data.start_loc.coordinates"] = df["start_loc"].apply( + lambda g: g["coordinates"] + ) + df["data.end_loc.coordinates"] = df["end_loc"].apply(lambda g: g["coordinates"]) + + # Check if 'md' is not a dictionary or does not contain the key 'distance' + # or if 'md["distance"]' is not a dictionary. + # If any of these conditions are true, return "INVALID". + get_max_mode_from_summary = lambda md: ( + "INVALID" + if not isinstance(md, dict) + or "distance" not in md + or not isinstance(md["distance"], dict) + # If 'md' is a dictionary and 'distance' is a valid key pointing to a dictionary: + else ( + # Get the maximum value from 'md["distance"]' using the values of 'md["distance"].get' as the key for 'max'. + # This operation only happens if the length of 'md["distance"]' is greater than 0. + # Otherwise, return "INVALID". + max(md["distance"], key=md["distance"].get) + if len(md["distance"]) > 0 + else "INVALID" + ) + ) + + df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply( + get_max_mode_from_summary + ) + df["data.primary_predicted_mode"] = df.inferred_section_summary.apply( + get_max_mode_from_summary + ) + if "ble_sensed_summary" in df.columns: + df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply( + get_max_mode_from_summary + ) else: logging.debug("No BLE support found, not fleet version, ignoring...") # Expand the user inputs user_input_df = pd.json_normalize(df.user_input) - df = pd.concat([df, user_input_df], axis='columns') + df = pd.concat([df, user_input_df], axis="columns") logging.debug(f"Before filtering {user_input_df.columns=}") - user_input_cols = [c for c in user_input_df.columns - if "metadata" not in c and - "xmlns" not in c and - "local_dt" not in c and - 'xmlResponse' not in c and - "_id" not in c] + user_input_cols = [ + c + for c in user_input_df.columns + if "metadata" not in c + and "xmlns" not in c + and "local_dt" not in c + and "xmlResponse" not in c + and "_id" not in c + ] logging.debug(f"After filtering {user_input_cols=}") combined_col_list = list(perm_utils.get_all_trip_columns()) + user_input_cols @@ -124,8 +157,8 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): if col in df.columns: df[col] = df[col].apply(str) for named_col in perm_utils.get_all_named_trip_columns(): - if named_col['path'] in df.columns: - df[named_col['label']] = df[named_col['path']] + if named_col["path"] in df.columns: + df[named_col["label"]] = df[named_col["path"]] # df = df.drop(columns=[named_col['path']]) # TODO: We should really display both the humanized value and the raw value # humanized value for people to see the entries in real time @@ -134,24 +167,28 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530105040 # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530439811 # so just replacing the distance and duration with the humanized values for now - df['data.distance_meters'] = df['data.distance'] - use_imperial = perm_utils.config.get("display_config", - {"use_imperial": False}).get("use_imperial", False) + df["data.distance_meters"] = df["data.distance"] + use_imperial = perm_utils.config.get( + "display_config", {"use_imperial": False} + ).get("use_imperial", False) # convert to km to humanize - df['data.distance_km'] = df['data.distance'] / 1000 + df["data.distance_km"] = df["data.distance"] / 1000 # convert km further to miles because this is the US, Liberia or Myanmar # https://en.wikipedia.org/wiki/Mile - df['data.duration_seconds'] = df['data.duration'] + df["data.duration_seconds"] = df["data.duration"] if use_imperial: - df['data.distance_miles'] = df['data.distance_km'] * 0.6213712 + df["data.distance_miles"] = df["data.distance_km"] * 0.6213712 - df['data.duration'] = df['data.duration'].apply(lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True)) + df["data.duration"] = df["data.duration"].apply( + lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True) + ) # logging.debug("After filtering, df columns are %s" % df.columns) # logging.debug("After filtering, the actual data is %s" % df.head()) # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) return (df, user_input_cols) + def query_demographics(): # Returns dictionary of df where key represent differnt survey id and values are df for each survey logging.debug("Querying the demographics for (no date range)") @@ -162,7 +199,7 @@ def query_demographics(): available_key = {} for entry in data: - survey_key = list(entry['data']['jsonDocResponse'].keys())[0] + survey_key = list(entry["data"]["jsonDocResponse"].keys())[0] if survey_key not in available_key: available_key[survey_key] = [] available_key[survey_key].append(entry) @@ -176,20 +213,28 @@ def query_demographics(): if not df.empty: for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: - df[col] = df[col].apply(str) + df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns= columns_to_drop, inplace=True) - modified_columns = perm_utils.get_demographic_columns(df.columns) - df.columns = modified_columns - df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + df.drop(columns=columns_to_drop, inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns + df.columns = [ + ( + col.rsplit(".", 1)[-1] + if col.startswith("data.jsonDocResponse.") + else col + ) + for col in df.columns + ] for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: if col in df.columns: - df.drop(columns= [col], inplace=True) - + df.drop(columns=[col], inplace=True) + return dataframes + def query_trajectories(start_date: str, end_date: str, tz: str): - + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( @@ -199,117 +244,131 @@ def query_trajectories(start_date: str, end_date: str, tz: str): df = pd.json_normalize(list(entries)) if not df.empty: for col in df.columns: - if df[col].dtype == 'object': + if df[col].dtype == "object": df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns= columns_to_drop, inplace=True) + df.drop(columns=columns_to_drop, inplace=True) for col in constants.EXCLUDED_TRAJECTORIES_COLS: if col in df.columns: - df.drop(columns= [col], inplace=True) - df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') + df.drop(columns=[col], inplace=True) + df["data.mode_str"] = df["data.mode"].apply( + lambda x: ( + ecwm.MotionTypes(x).name + if x in set(enum.value for enum in ecwm.MotionTypes) + else "UNKNOWN" + ) + ) return df def add_user_stats(user_data): for user in user_data: - user_uuid = UUID(user['user_id']) + user_uuid = UUID(user["user_id"]) total_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}] + extra_query_list=[{"user_id": user_uuid}], ) - user['total_trips'] = total_trips + user["total_trips"] = total_trips labeled_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] + extra_query_list=[{"user_id": user_uuid}, {"data.user_input": {"$ne": {}}}], ) - user['labeled_trips'] = labeled_trips - - profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) - user['platform'] = profile_data.get('curr_platform') - user['manufacturer'] = profile_data.get('manufacturer') - user['app_version'] = profile_data.get('client_app_version') - user['os_version'] = profile_data.get('client_os_version') - user['phone_lang'] = profile_data.get('phone_lang') - - + user["labeled_trips"] = labeled_trips + profile_data = edb.get_profile_db().find_one({"user_id": user_uuid}) + user["platform"] = profile_data.get("curr_platform") + user["manufacturer"] = profile_data.get("manufacturer") + user["app_version"] = profile_data.get("client_app_version") + user["os_version"] = profile_data.get("client_os_version") + user["phone_lang"] = profile_data.get("phone_lang") if total_trips > 0: - time_format = 'YYYY-MM-DD HH:mm:ss' + time_format = "YYYY-MM-DD HH:mm:ss" ts = esta.TimeSeries.get_time_series(user_uuid) start_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.ASCENDING + key="analysis/confirmed_trip", + field="data.end_ts", + sort_order=pymongo.ASCENDING, ) if start_ts != -1: - user['first_trip'] = arrow.get(start_ts).format(time_format) + user["first_trip"] = arrow.get(start_ts).format(time_format) end_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.DESCENDING + key="analysis/confirmed_trip", + field="data.end_ts", + sort_order=pymongo.DESCENDING, ) if end_ts != -1: - user['last_trip'] = arrow.get(end_ts).format(time_format) + user["last_trip"] = arrow.get(end_ts).format(time_format) last_call = ts.get_first_value_for_field( - key='stats/server_api_time', - field='data.ts', - sort_order=pymongo.DESCENDING + key="stats/server_api_time", + field="data.ts", + sort_order=pymongo.DESCENDING, ) if last_call != -1: - user['last_call'] = arrow.get(last_call).format(time_format) + user["last_call"] = arrow.get(last_call).format(time_format) return user_data -def query_segments_crossing_endpoints(poly_region_start, poly_region_end, start_date: str, end_date: str, tz: str, excluded_uuids: list[str]): + +def query_segments_crossing_endpoints( + poly_region_start, + poly_region_end, + start_date: str, + end_date: str, + tz: str, + excluded_uuids: list[str], +): (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) tq = estt.TimeQuery("data.ts", start_ts, end_ts) - not_excluded_uuid_query = {'user_id': {'$nin': [UUID(uuid) for uuid in excluded_uuids]}} + not_excluded_uuid_query = { + "user_id": {"$nin": [UUID(uuid) for uuid in excluded_uuids]} + } agg_ts = estag.AggregateTimeSeries().get_aggregate_time_series() locs_matching_start = agg_ts.get_data_df( - "analysis/recreated_location", - geo_query = estg.GeoQuery(['data.loc'], poly_region_start), - time_query = tq, - extra_query_list=[not_excluded_uuid_query] - ) - locs_matching_start = locs_matching_start.drop_duplicates(subset=['section']) + "analysis/recreated_location", + geo_query=estg.GeoQuery(["data.loc"], poly_region_start), + time_query=tq, + extra_query_list=[not_excluded_uuid_query], + ) + locs_matching_start = locs_matching_start.drop_duplicates(subset=["section"]) if locs_matching_start.empty: return locs_matching_start - + locs_matching_end = agg_ts.get_data_df( - "analysis/recreated_location", - geo_query = estg.GeoQuery(['data.loc'], poly_region_end), - time_query = tq, - extra_query_list=[not_excluded_uuid_query] - ) - locs_matching_end = locs_matching_end.drop_duplicates(subset=['section']) + "analysis/recreated_location", + geo_query=estg.GeoQuery(["data.loc"], poly_region_end), + time_query=tq, + extra_query_list=[not_excluded_uuid_query], + ) + locs_matching_end = locs_matching_end.drop_duplicates(subset=["section"]) if locs_matching_end.empty: return locs_matching_end - - merged = locs_matching_start.merge(locs_matching_end, how='outer', on=['section']) - filtered = merged.loc[merged['idx_x'] Date: Thu, 5 Sep 2024 15:40:34 -0700 Subject: [PATCH 2/9] Modified Script --- docker/load_mongodump.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index 9039b59..e3e2738 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -1,8 +1,8 @@ #!/bin/bash # Check if the correct number of arguments is provided -if [ "$#" -ne 3 ]; then - echo "Usage: $0 " +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " exit 1 fi @@ -10,19 +10,20 @@ fi MONGODUMP_FILE=$1 DATABASE_NAME=$2 DOCKER_CONTAINER_NAME=$3 +COLLECTION_NAME=$4 # Extract the base name of the dump file for use in the restore command -FILE_NAME=$(basename $MONGODUMP_FILE) +FILE_NAME=$(basename "$MONGODUMP_FILE") # Drop the existing database to ensure it’s clean before restoring the new dump echo "Dropping the existing database $DATABASE_NAME" -docker exec $DOCKER_CONTAINER_NAME mongo $DATABASE_NAME --eval "db.dropDatabase()" +docker exec "$DOCKER_CONTAINER_NAME" mongo "$DATABASE_NAME" --eval "db.dropDatabase()" # Copy the MongoDB dump file from the local machine to the Docker container echo "Copying file to Docker container $DOCKER_CONTAINER_NAME" -docker cp $MONGODUMP_FILE $DOCKER_CONTAINER_NAME:/tmp +docker cp "$MONGODUMP_FILE" "$DOCKER_CONTAINER_NAME:/tmp" # Restore the dump into the specified database echo "Restoring the dump from $FILE_NAME to database $DATABASE_NAME" -docker exec -e MONGODUMP_FILE=$FILE_NAME $DOCKER_CONTAINER_NAME bash -c \ - 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore -d '"$DATABASE_NAME"' dump/openpath_prod_ca_ebike' +docker exec -e MONGODUMP_FILE="$FILE_NAME" "$DOCKER_CONTAINER_NAME" bash -c \ + 'cd /tmp && tar xvf "$MONGODUMP_FILE" && mongorestore -d '"$DATABASE_NAME"' dump/'"$COLLECTION_NAME"'' From 0dba1f41a50a8e52b69b1742688869dd7419ff1d Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Fri, 6 Sep 2024 08:56:39 -0700 Subject: [PATCH 3/9] reverted format --- docker/load_mongodump.sh | 32 +++++- utils/db_utils.py | 236 ++++++++++++++++----------------------- 2 files changed, 124 insertions(+), 144 deletions(-) diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index e3e2738..3674a72 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -7,10 +7,10 @@ if [ "$#" -ne 4 ]; then fi # Assign arguments to variables -MONGODUMP_FILE=$1 -DATABASE_NAME=$2 -DOCKER_CONTAINER_NAME=$3 -COLLECTION_NAME=$4 +MONGODUMP_FILE="$1" +DATABASE_NAME="$2" +DOCKER_CONTAINER_NAME="$3" +COLLECTION_NAME="$4" # Extract the base name of the dump file for use in the restore command FILE_NAME=$(basename "$MONGODUMP_FILE") @@ -19,11 +19,31 @@ FILE_NAME=$(basename "$MONGODUMP_FILE") echo "Dropping the existing database $DATABASE_NAME" docker exec "$DOCKER_CONTAINER_NAME" mongo "$DATABASE_NAME" --eval "db.dropDatabase()" +# Check if the drop command was successful +if [ $? -ne 0 ]; then + echo "Failed to drop the database $DATABASE_NAME" + exit 1 +fi + # Copy the MongoDB dump file from the local machine to the Docker container echo "Copying file to Docker container $DOCKER_CONTAINER_NAME" docker cp "$MONGODUMP_FILE" "$DOCKER_CONTAINER_NAME:/tmp" +# Check if the copy command was successful +if [ $? -ne 0 ]; then + echo "Failed to copy the dump file to the Docker container" + exit 1 +fi + # Restore the dump into the specified database echo "Restoring the dump from $FILE_NAME to database $DATABASE_NAME" -docker exec -e MONGODUMP_FILE="$FILE_NAME" "$DOCKER_CONTAINER_NAME" bash -c \ - 'cd /tmp && tar xvf "$MONGODUMP_FILE" && mongorestore -d '"$DATABASE_NAME"' dump/'"$COLLECTION_NAME"'' +docker exec -e MONGODUMP_FILE="/tmp/$FILE_NAME" "$DOCKER_CONTAINER_NAME" bash -c \ + 'tar xvf "$MONGODUMP_FILE" -C /tmp && mongorestore --db '"$DATABASE_NAME"' /tmp/dump/'"$COLLECTION_NAME"' --drop' + +# Check if the restore command was successful +if [ $? -ne 0 ]; then + echo "Failed to restore the dump to the database $DATABASE_NAME" + exit 1 +fi + +echo "Restore completed successfully" diff --git a/utils/db_utils.py b/utils/db_utils.py index 5aad065..f4fefdd 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -17,19 +17,16 @@ from utils import permissions as perm_utils from utils.datetime_utils import iso_range_to_ts_range - def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = []): """ Returns a dictionary of df records, given a dataframe, a column to filter on, and a list of values that rows in that column will be excluded if they match """ - if df.empty: - return [] - if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] + if df.empty: return [] + if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] df = df[~df[col_to_filter].isin(vals_to_exclude)] return df.to_dict("records") - def query_uuids(start_date: str, end_date: str, tz: str): # As of now, time filtering does not apply to UUIDs; we just query all of them. # Vestigial code commented out and left below for future reference @@ -61,20 +58,18 @@ def query_uuids(start_date: str, end_date: str, tz: str): entries = edb.get_uuid_db().find() df = pd.json_normalize(list(entries)) if not df.empty: - df["update_ts"] = pd.to_datetime(df["update_ts"]) - df["user_id"] = df["uuid"].apply(str) - df["user_token"] = df["user_email"] + df['update_ts'] = pd.to_datetime(df['update_ts']) + df['user_id'] = df['uuid'].apply(str) + df['user_token'] = df['user_email'] df.drop(columns=["uuid", "_id"], inplace=True) return df - def query_confirmed_trips(start_date: str, end_date: str, tz: str): (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic - df = ts.get_data_df( - "analysis/confirmed_trip", + df = ts.get_data_df("analysis/confirmed_trip", time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts), ) user_input_cols = [] @@ -87,19 +82,19 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # for backwards compatibility. We do this for all columns since columns which don't exist are ignored by the rename command. rename_cols = constants.VALID_TRIP_COLS # the mapping is `{distance: data.distance, duration: data.duration} etc - rename_mapping = dict( - zip([c.replace("data.", "") for c in rename_cols], rename_cols) - ) + rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols)) logging.debug("Rename mapping is %s" % rename_mapping) df.rename(columns=rename_mapping, inplace=True) logging.debug("After renaming columns, they are %s" % df.columns) # Now copy over the coordinates - df["data.start_loc.coordinates"] = df["start_loc"].apply( - lambda g: g["coordinates"] - ) - df["data.end_loc.coordinates"] = df["end_loc"].apply(lambda g: g["coordinates"]) + df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"]) + df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"]) + # Add primary modes from the sensed, inferred and ble summaries. Note that we do this + # **before** filtering the `all_trip_columns` because the + # *_section_summary columns are not currently valid + # Check if 'md' is not a dictionary or does not contain the key 'distance' # or if 'md["distance"]' is not a dictionary. # If any of these conditions are true, return "INVALID". @@ -119,32 +114,23 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): ) ) - df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply( - get_max_mode_from_summary - ) - df["data.primary_predicted_mode"] = df.inferred_section_summary.apply( - get_max_mode_from_summary - ) - if "ble_sensed_summary" in df.columns: - df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply( - get_max_mode_from_summary - ) + df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary) + df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary) + if 'ble_sensed_summary' in df.columns: + df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary) else: logging.debug("No BLE support found, not fleet version, ignoring...") # Expand the user inputs user_input_df = pd.json_normalize(df.user_input) - df = pd.concat([df, user_input_df], axis="columns") + df = pd.concat([df, user_input_df], axis='columns') logging.debug(f"Before filtering {user_input_df.columns=}") - user_input_cols = [ - c - for c in user_input_df.columns - if "metadata" not in c - and "xmlns" not in c - and "local_dt" not in c - and "xmlResponse" not in c - and "_id" not in c - ] + user_input_cols = [c for c in user_input_df.columns + if "metadata" not in c and + "xmlns" not in c and + "local_dt" not in c and + 'xmlResponse' not in c and + "_id" not in c] logging.debug(f"After filtering {user_input_cols=}") combined_col_list = list(perm_utils.get_all_trip_columns()) + user_input_cols @@ -157,8 +143,8 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): if col in df.columns: df[col] = df[col].apply(str) for named_col in perm_utils.get_all_named_trip_columns(): - if named_col["path"] in df.columns: - df[named_col["label"]] = df[named_col["path"]] + if named_col['path'] in df.columns: + df[named_col['label']] = df[named_col['path']] # df = df.drop(columns=[named_col['path']]) # TODO: We should really display both the humanized value and the raw value # humanized value for people to see the entries in real time @@ -167,28 +153,24 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530105040 # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530439811 # so just replacing the distance and duration with the humanized values for now - df["data.distance_meters"] = df["data.distance"] - use_imperial = perm_utils.config.get( - "display_config", {"use_imperial": False} - ).get("use_imperial", False) + df['data.distance_meters'] = df['data.distance'] + use_imperial = perm_utils.config.get("display_config", + {"use_imperial": False}).get("use_imperial", False) # convert to km to humanize - df["data.distance_km"] = df["data.distance"] / 1000 + df['data.distance_km'] = df['data.distance'] / 1000 # convert km further to miles because this is the US, Liberia or Myanmar # https://en.wikipedia.org/wiki/Mile - df["data.duration_seconds"] = df["data.duration"] + df['data.duration_seconds'] = df['data.duration'] if use_imperial: - df["data.distance_miles"] = df["data.distance_km"] * 0.6213712 + df['data.distance_miles'] = df['data.distance_km'] * 0.6213712 - df["data.duration"] = df["data.duration"].apply( - lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True) - ) + df['data.duration'] = df['data.duration'].apply(lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True)) # logging.debug("After filtering, df columns are %s" % df.columns) # logging.debug("After filtering, the actual data is %s" % df.head()) # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) return (df, user_input_cols) - def query_demographics(): # Returns dictionary of df where key represent differnt survey id and values are df for each survey logging.debug("Querying the demographics for (no date range)") @@ -199,7 +181,7 @@ def query_demographics(): available_key = {} for entry in data: - survey_key = list(entry["data"]["jsonDocResponse"].keys())[0] + survey_key = list(entry['data']['jsonDocResponse'].keys())[0] if survey_key not in available_key: available_key[survey_key] = [] available_key[survey_key].append(entry) @@ -213,28 +195,20 @@ def query_demographics(): if not df.empty: for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: - df[col] = df[col].apply(str) + df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns=columns_to_drop, inplace=True) - modified_columns = perm_utils.get_demographic_columns(df.columns) - df.columns = modified_columns - df.columns = [ - ( - col.rsplit(".", 1)[-1] - if col.startswith("data.jsonDocResponse.") - else col - ) - for col in df.columns - ] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: if col in df.columns: - df.drop(columns=[col], inplace=True) - + df.drop(columns= [col], inplace=True) + return dataframes - def query_trajectories(start_date: str, end_date: str, tz: str): - + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( @@ -244,131 +218,117 @@ def query_trajectories(start_date: str, end_date: str, tz: str): df = pd.json_normalize(list(entries)) if not df.empty: for col in df.columns: - if df[col].dtype == "object": + if df[col].dtype == 'object': df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns=columns_to_drop, inplace=True) + df.drop(columns= columns_to_drop, inplace=True) for col in constants.EXCLUDED_TRAJECTORIES_COLS: if col in df.columns: - df.drop(columns=[col], inplace=True) - df["data.mode_str"] = df["data.mode"].apply( - lambda x: ( - ecwm.MotionTypes(x).name - if x in set(enum.value for enum in ecwm.MotionTypes) - else "UNKNOWN" - ) - ) + df.drop(columns= [col], inplace=True) + df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') return df def add_user_stats(user_data): for user in user_data: - user_uuid = UUID(user["user_id"]) + user_uuid = UUID(user['user_id']) total_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( key_list=["analysis/confirmed_trip"], - extra_query_list=[{"user_id": user_uuid}], + extra_query_list=[{'user_id': user_uuid}] ) - user["total_trips"] = total_trips + user['total_trips'] = total_trips labeled_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( key_list=["analysis/confirmed_trip"], - extra_query_list=[{"user_id": user_uuid}, {"data.user_input": {"$ne": {}}}], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] ) - user["labeled_trips"] = labeled_trips + user['labeled_trips'] = labeled_trips + + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + user['platform'] = profile_data.get('curr_platform') + user['manufacturer'] = profile_data.get('manufacturer') + user['app_version'] = profile_data.get('client_app_version') + user['os_version'] = profile_data.get('client_os_version') + user['phone_lang'] = profile_data.get('phone_lang') + + - profile_data = edb.get_profile_db().find_one({"user_id": user_uuid}) - user["platform"] = profile_data.get("curr_platform") - user["manufacturer"] = profile_data.get("manufacturer") - user["app_version"] = profile_data.get("client_app_version") - user["os_version"] = profile_data.get("client_os_version") - user["phone_lang"] = profile_data.get("phone_lang") if total_trips > 0: - time_format = "YYYY-MM-DD HH:mm:ss" + time_format = 'YYYY-MM-DD HH:mm:ss' ts = esta.TimeSeries.get_time_series(user_uuid) start_ts = ts.get_first_value_for_field( - key="analysis/confirmed_trip", - field="data.end_ts", - sort_order=pymongo.ASCENDING, + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.ASCENDING ) if start_ts != -1: - user["first_trip"] = arrow.get(start_ts).format(time_format) + user['first_trip'] = arrow.get(start_ts).format(time_format) end_ts = ts.get_first_value_for_field( - key="analysis/confirmed_trip", - field="data.end_ts", - sort_order=pymongo.DESCENDING, + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.DESCENDING ) if end_ts != -1: - user["last_trip"] = arrow.get(end_ts).format(time_format) + user['last_trip'] = arrow.get(end_ts).format(time_format) last_call = ts.get_first_value_for_field( - key="stats/server_api_time", - field="data.ts", - sort_order=pymongo.DESCENDING, + key='stats/server_api_time', + field='data.ts', + sort_order=pymongo.DESCENDING ) if last_call != -1: - user["last_call"] = arrow.get(last_call).format(time_format) + user['last_call'] = arrow.get(last_call).format(time_format) return user_data - -def query_segments_crossing_endpoints( - poly_region_start, - poly_region_end, - start_date: str, - end_date: str, - tz: str, - excluded_uuids: list[str], -): +def query_segments_crossing_endpoints(poly_region_start, poly_region_end, start_date: str, end_date: str, tz: str, excluded_uuids: list[str]): (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) tq = estt.TimeQuery("data.ts", start_ts, end_ts) - not_excluded_uuid_query = { - "user_id": {"$nin": [UUID(uuid) for uuid in excluded_uuids]} - } + not_excluded_uuid_query = {'user_id': {'$nin': [UUID(uuid) for uuid in excluded_uuids]}} agg_ts = estag.AggregateTimeSeries().get_aggregate_time_series() locs_matching_start = agg_ts.get_data_df( - "analysis/recreated_location", - geo_query=estg.GeoQuery(["data.loc"], poly_region_start), - time_query=tq, - extra_query_list=[not_excluded_uuid_query], - ) - locs_matching_start = locs_matching_start.drop_duplicates(subset=["section"]) + "analysis/recreated_location", + geo_query = estg.GeoQuery(['data.loc'], poly_region_start), + time_query = tq, + extra_query_list=[not_excluded_uuid_query] + ) + locs_matching_start = locs_matching_start.drop_duplicates(subset=['section']) if locs_matching_start.empty: return locs_matching_start - + locs_matching_end = agg_ts.get_data_df( - "analysis/recreated_location", - geo_query=estg.GeoQuery(["data.loc"], poly_region_end), - time_query=tq, - extra_query_list=[not_excluded_uuid_query], - ) - locs_matching_end = locs_matching_end.drop_duplicates(subset=["section"]) + "analysis/recreated_location", + geo_query = estg.GeoQuery(['data.loc'], poly_region_end), + time_query = tq, + extra_query_list=[not_excluded_uuid_query] + ) + locs_matching_end = locs_matching_end.drop_duplicates(subset=['section']) if locs_matching_end.empty: return locs_matching_end - - merged = locs_matching_start.merge(locs_matching_end, how="outer", on=["section"]) - filtered = merged.loc[merged["idx_x"] < merged["idx_y"]].copy() - filtered["duration"] = filtered["ts_y"] - filtered["ts_x"] - filtered["mode"] = filtered["mode_x"] - filtered["start_fmt_time"] = filtered["fmt_time_x"] - filtered["end_fmt_time"] = filtered["fmt_time_y"] - filtered["user_id"] = filtered["user_id_y"] - + + merged = locs_matching_start.merge(locs_matching_end, how='outer', on=['section']) + filtered = merged.loc[merged['idx_x'] Date: Fri, 6 Sep 2024 12:40:07 -0700 Subject: [PATCH 4/9] modified load_mongodump to be flexible --- docker-compose-dev.yml | 5 ++-- docker-compose-dev.yml.bak | 50 ++++++++++++++++++++++++++++++++++++++ docker/load_mongodump.sh | 30 +++++++++++++++++++++-- 3 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 docker-compose-dev.yml.bak diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index df44e28..6aa82e2 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -1,4 +1,3 @@ -# docker-compose.yml version: "3" services: dashboard: @@ -14,7 +13,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: db + DB_HOST: mongodb://db:27017/testing WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" @@ -30,7 +29,7 @@ services: - ./app_sidebar_collapsible.py:/usr/src/app/app_sidebar_collapsible.py deploy: restart_policy: - condition: on-failure + condition: on-failure db: image: mongo:4.4.0 deploy: diff --git a/docker-compose-dev.yml.bak b/docker-compose-dev.yml.bak new file mode 100644 index 0000000..6f93288 --- /dev/null +++ b/docker-compose-dev.yml.bak @@ -0,0 +1,50 @@ +version: "3" +services: + dashboard: + build: + context: . + dockerfile: docker/Dockerfile + args: + SERVER_IMAGE_TAG: ${SERVER_IMAGE_TAG} + image: e-mission/opdash:0.0.1 + ports: + - "8050:8050" + environment: + DASH_DEBUG_MODE: "True" + DASH_SILENCE_ROUTES_LOGGING: "False" + DASH_SERVER_PORT: 8050 + DB_HOST: mongodb://db:27017/test + WEB_SERVER_HOST: 0.0.0.0 + SERVER_BRANCH: master + CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" + STUDY_CONFIG: "stage-program" + AUTH_TYPE: "basic" # the other option is cognito + REACT_VERSION: "18.2.0" + networks: + - emission + volumes: + - ./pages:/usr/src/app/pages + - ./utils:/usr/src/app/utils + - ./app.py:/usr/src/app/app.py + - ./app_sidebar_collapsible.py:/usr/src/app/app_sidebar_collapsible.py + deploy: + restart_policy: + condition: on-failure + db: + image: mongo:4.4.0 + deploy: + replicas: 1 + restart_policy: + condition: on-failure + volumes: + - mongo-data:/data/db + networks: + - emission + ports: + - "27017:27017" + +networks: + emission: + +volumes: + mongo-data: diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index 3674a72..3ffb23b 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -15,6 +15,32 @@ COLLECTION_NAME="$4" # Extract the base name of the dump file for use in the restore command FILE_NAME=$(basename "$MONGODUMP_FILE") +# Paths +SCRIPT_DIR="$(dirname "$0")" +COMPOSE_FILE="$SCRIPT_DIR/../docker-compose-dev.yml" # Adjust the path to the docker-compose.yml file + +# Extract the new database host from the docker-compose file +NEW_DB_HOST=$(grep 'DB_HOST:' "$COMPOSE_FILE" | sed 's|DB_HOST: ||') + +# Modify the docker-compose.yml to update the DB_HOST (if needed) +echo "Updating $COMPOSE_FILE to set DB_HOST to mongodb://db:27017/$DATABASE_NAME" +sed -i.bak "s|DB_HOST: .*|DB_HOST: mongodb://db:27017/$DATABASE_NAME|" "$COMPOSE_FILE" + +# Restart Docker Compose to apply changes +echo "Restarting Docker Compose services" +docker compose -f "$COMPOSE_FILE" down +docker compose -f "$COMPOSE_FILE" up -d + +# Wait for Docker Compose to be ready +echo "Waiting for Docker Compose services to be ready" +sleep 10 + +# Check if the Docker container is running +if ! docker ps | grep -q "$DOCKER_CONTAINER_NAME"; then + echo "Docker container $DOCKER_CONTAINER_NAME is not running" + exit 1 +fi + # Drop the existing database to ensure it’s clean before restoring the new dump echo "Dropping the existing database $DATABASE_NAME" docker exec "$DOCKER_CONTAINER_NAME" mongo "$DATABASE_NAME" --eval "db.dropDatabase()" @@ -37,8 +63,8 @@ fi # Restore the dump into the specified database echo "Restoring the dump from $FILE_NAME to database $DATABASE_NAME" -docker exec -e MONGODUMP_FILE="/tmp/$FILE_NAME" "$DOCKER_CONTAINER_NAME" bash -c \ - 'tar xvf "$MONGODUMP_FILE" -C /tmp && mongorestore --db '"$DATABASE_NAME"' /tmp/dump/'"$COLLECTION_NAME"' --drop' +docker exec "$DOCKER_CONTAINER_NAME" bash -c \ + "tar xvf /tmp/$FILE_NAME -C /tmp && mongorestore --db $DATABASE_NAME /tmp/dump/$COLLECTION_NAME --drop" # Check if the restore command was successful if [ $? -ne 0 ]; then From c4149c0eda814d1fd80497cf9261ca9d69df70eb Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Fri, 6 Sep 2024 13:31:40 -0700 Subject: [PATCH 5/9] Simplified Script --- docker-compose-dev.yml | 2 +- docker/load_mongodump.sh | 96 ++++++++++++++++++---------------------- 2 files changed, 45 insertions(+), 53 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 6aa82e2..d380f68 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -13,7 +13,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db:27017/testing + DB_HOST: mongodb://db:27017/openpath_prod_ca_ebike WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index 3ffb23b..81929a8 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -1,75 +1,67 @@ #!/bin/bash +# Directory of the script +SCRIPT_DIR="$(dirname "$0")" + +# Path to the configuration file (one level up) +CONFIG_FILE="$SCRIPT_DIR/../docker-compose-dev.yml" + # Check if the correct number of arguments is provided -if [ "$#" -ne 4 ]; then - echo "Usage: $0 " +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + echo " : The path to the MongoDB dump file to be restored." exit 1 fi -# Assign arguments to variables -MONGODUMP_FILE="$1" -DATABASE_NAME="$2" -DOCKER_CONTAINER_NAME="$3" -COLLECTION_NAME="$4" - -# Extract the base name of the dump file for use in the restore command -FILE_NAME=$(basename "$MONGODUMP_FILE") +MONGODUMP_FILE=$1 -# Paths -SCRIPT_DIR="$(dirname "$0")" -COMPOSE_FILE="$SCRIPT_DIR/../docker-compose-dev.yml" # Adjust the path to the docker-compose.yml file +# Print debug information +echo "Script Directory: $SCRIPT_DIR" +echo "Configuration File Path: $CONFIG_FILE" +echo "MongoDump File Path: $MONGODUMP_FILE" -# Extract the new database host from the docker-compose file -NEW_DB_HOST=$(grep 'DB_HOST:' "$COMPOSE_FILE" | sed 's|DB_HOST: ||') +# Check if the provided file exists +if [ ! -f "$MONGODUMP_FILE" ]; then + echo "Error: File '$MONGODUMP_FILE' does not exist." + exit 1 +fi -# Modify the docker-compose.yml to update the DB_HOST (if needed) -echo "Updating $COMPOSE_FILE to set DB_HOST to mongodb://db:27017/$DATABASE_NAME" -sed -i.bak "s|DB_HOST: .*|DB_HOST: mongodb://db:27017/$DATABASE_NAME|" "$COMPOSE_FILE" +# Check if the configuration file exists +if [ ! -f "$CONFIG_FILE" ]; then + echo "Error: Configuration file '$CONFIG_FILE' does not exist." + exit 1 +fi -# Restart Docker Compose to apply changes -echo "Restarting Docker Compose services" -docker compose -f "$COMPOSE_FILE" down -docker compose -f "$COMPOSE_FILE" up -d +# Print details about the configuration file +echo "Configuration file details:" +ls -l "$CONFIG_FILE" -# Wait for Docker Compose to be ready -echo "Waiting for Docker Compose services to be ready" -sleep 10 -# Check if the Docker container is running -if ! docker ps | grep -q "$DOCKER_CONTAINER_NAME"; then - echo "Docker container $DOCKER_CONTAINER_NAME is not running" +# Extract DB_HOST from the YAML file +DB_HOST=$(grep -i "DB_HOST:" "$CONFIG_FILE" | sed 's/^[^:]*: *//') +if [ -z "$DB_HOST" ]; then + echo "Error: DB_HOST not found in configuration file." exit 1 fi -# Drop the existing database to ensure it’s clean before restoring the new dump -echo "Dropping the existing database $DATABASE_NAME" -docker exec "$DOCKER_CONTAINER_NAME" mongo "$DATABASE_NAME" --eval "db.dropDatabase()" +# Extract the database name from DB_HOST +DB_NAME=$(echo "$DB_HOST" | sed -e 's/^mongodb:\/\/[^:]*:[0-9]*\///') -# Check if the drop command was successful -if [ $? -ne 0 ]; then - echo "Failed to drop the database $DATABASE_NAME" +# Check if the database name was extracted correctly +if [ -z "$DB_NAME" ]; then + echo "Error: Failed to extract database name from DB_HOST." exit 1 fi -# Copy the MongoDB dump file from the local machine to the Docker container -echo "Copying file to Docker container $DOCKER_CONTAINER_NAME" -docker cp "$MONGODUMP_FILE" "$DOCKER_CONTAINER_NAME:/tmp" +echo "Copying file to Docker container" +docker cp "$MONGODUMP_FILE" op-admin-dashboard-db-1:/tmp -# Check if the copy command was successful -if [ $? -ne 0 ]; then - echo "Failed to copy the dump file to the Docker container" - exit 1 -fi +FILE_NAME=$(basename "$MONGODUMP_FILE") -# Restore the dump into the specified database -echo "Restoring the dump from $FILE_NAME to database $DATABASE_NAME" -docker exec "$DOCKER_CONTAINER_NAME" bash -c \ - "tar xvf /tmp/$FILE_NAME -C /tmp && mongorestore --db $DATABASE_NAME /tmp/dump/$COLLECTION_NAME --drop" +echo "Clearing existing database" +docker exec op-admin-dashboard-db-1 bash -c 'mongo --eval "db.getMongo().getDBNames().forEach(function(d) { if (d !== \"admin\" && d !== \"local\") db.getSiblingDB(d).dropDatabase(); })"' -# Check if the restore command was successful -if [ $? -ne 0 ]; then - echo "Failed to restore the dump to the database $DATABASE_NAME" - exit 1 -fi +echo "Restoring the dump from $FILE_NAME to database $DB_NAME" +docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c "cd /tmp && tar xvf $FILE_NAME && mongorestore -d $DB_NAME dump/openpath_prod_ca_ebike" -echo "Restore completed successfully" +echo "Database restore complete." From 3db763c1aecd294d5a07f2361bd660590264ce15 Mon Sep 17 00:00:00 2001 From: Robin Date: Fri, 6 Sep 2024 13:33:16 -0700 Subject: [PATCH 6/9] Delete docker-compose-dev.yml.bak --- docker-compose-dev.yml.bak | 50 -------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 docker-compose-dev.yml.bak diff --git a/docker-compose-dev.yml.bak b/docker-compose-dev.yml.bak deleted file mode 100644 index 6f93288..0000000 --- a/docker-compose-dev.yml.bak +++ /dev/null @@ -1,50 +0,0 @@ -version: "3" -services: - dashboard: - build: - context: . - dockerfile: docker/Dockerfile - args: - SERVER_IMAGE_TAG: ${SERVER_IMAGE_TAG} - image: e-mission/opdash:0.0.1 - ports: - - "8050:8050" - environment: - DASH_DEBUG_MODE: "True" - DASH_SILENCE_ROUTES_LOGGING: "False" - DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db:27017/test - WEB_SERVER_HOST: 0.0.0.0 - SERVER_BRANCH: master - CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" - STUDY_CONFIG: "stage-program" - AUTH_TYPE: "basic" # the other option is cognito - REACT_VERSION: "18.2.0" - networks: - - emission - volumes: - - ./pages:/usr/src/app/pages - - ./utils:/usr/src/app/utils - - ./app.py:/usr/src/app/app.py - - ./app_sidebar_collapsible.py:/usr/src/app/app_sidebar_collapsible.py - deploy: - restart_policy: - condition: on-failure - db: - image: mongo:4.4.0 - deploy: - replicas: 1 - restart_policy: - condition: on-failure - volumes: - - mongo-data:/data/db - networks: - - emission - ports: - - "27017:27017" - -networks: - emission: - -volumes: - mongo-data: From d3f365062012c7e63d737a2db9fbc0450c7208f6 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sat, 7 Sep 2024 12:06:24 -0700 Subject: [PATCH 7/9] Get DB Name from Dump --- docker-compose-dev.yml | 2 +- docker-compose-dev.yml.bak | 2 +- docker/load_mongodump.sh | 29 +++++++++++++++++------------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d380f68..23784d5 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -13,7 +13,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db:27017/openpath_prod_ca_ebike + DB_HOST: mongodb://db/DB_NAME WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" diff --git a/docker-compose-dev.yml.bak b/docker-compose-dev.yml.bak index 6f93288..cfadbf4 100644 --- a/docker-compose-dev.yml.bak +++ b/docker-compose-dev.yml.bak @@ -13,7 +13,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db:27017/test + DB_HOST: mongodb://db/openpath_prod_ca_ebike WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index 81929a8..fd0a98c 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -36,22 +36,24 @@ fi echo "Configuration file details:" ls -l "$CONFIG_FILE" +# Extract database name from the mongodump file +TEMP_DIR=$(mktemp -d) +tar -xf "$MONGODUMP_FILE" -C "$TEMP_DIR" +DB_NAME=$(find "$TEMP_DIR/dump" -mindepth 1 -maxdepth 1 -type d -exec basename {} \;) -# Extract DB_HOST from the YAML file -DB_HOST=$(grep -i "DB_HOST:" "$CONFIG_FILE" | sed 's/^[^:]*: *//') -if [ -z "$DB_HOST" ]; then - echo "Error: DB_HOST not found in configuration file." +if [ -z "$DB_NAME" ]; then + echo "Error: Failed to extract database name from mongodump." exit 1 fi -# Extract the database name from DB_HOST -DB_NAME=$(echo "$DB_HOST" | sed -e 's/^mongodb:\/\/[^:]*:[0-9]*\///') +echo "Database Name: $DB_NAME" -# Check if the database name was extracted correctly -if [ -z "$DB_NAME" ]; then - echo "Error: Failed to extract database name from DB_HOST." - exit 1 -fi +# Update the docker-compose configuration file with the actual DB_HOST +DB_HOST="mongodb://db/$DB_NAME" +sed -i.bak "s|DB_HOST:.*|DB_HOST: $DB_HOST|" "$CONFIG_FILE" + +echo "Updated docker-compose file:" +cat "$CONFIG_FILE" echo "Copying file to Docker container" docker cp "$MONGODUMP_FILE" op-admin-dashboard-db-1:/tmp @@ -62,6 +64,9 @@ echo "Clearing existing database" docker exec op-admin-dashboard-db-1 bash -c 'mongo --eval "db.getMongo().getDBNames().forEach(function(d) { if (d !== \"admin\" && d !== \"local\") db.getSiblingDB(d).dropDatabase(); })"' echo "Restoring the dump from $FILE_NAME to database $DB_NAME" -docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c "cd /tmp && tar xvf $FILE_NAME && mongorestore -d $DB_NAME dump/openpath_prod_ca_ebike" +docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c "cd /tmp && tar xvf $FILE_NAME && mongorestore -d $DB_NAME dump/$DB_NAME" echo "Database restore complete." + +# Clean up temporary directory +rm -rf "$TEMP_DIR" From 86423e4303b4746f59fa12a48d48b6aad73e13e4 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sat, 7 Sep 2024 16:24:37 -0700 Subject: [PATCH 8/9] Reverted --- docker-compose-dev.yml | 5 +-- docker/load_mongodump.sh | 72 +++------------------------------------- 2 files changed, 8 insertions(+), 69 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 23784d5..df44e28 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -1,3 +1,4 @@ +# docker-compose.yml version: "3" services: dashboard: @@ -13,7 +14,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db/DB_NAME + DB_HOST: db WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" @@ -29,7 +30,7 @@ services: - ./app_sidebar_collapsible.py:/usr/src/app/app_sidebar_collapsible.py deploy: restart_policy: - condition: on-failure + condition: on-failure db: image: mongo:4.4.0 deploy: diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh index fd0a98c..bd6fcea 100755 --- a/docker/load_mongodump.sh +++ b/docker/load_mongodump.sh @@ -1,72 +1,10 @@ -#!/bin/bash - -# Directory of the script -SCRIPT_DIR="$(dirname "$0")" - -# Path to the configuration file (one level up) -CONFIG_FILE="$SCRIPT_DIR/../docker-compose-dev.yml" - -# Check if the correct number of arguments is provided -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " - echo " : The path to the MongoDB dump file to be restored." - exit 1 -fi - MONGODUMP_FILE=$1 -# Print debug information -echo "Script Directory: $SCRIPT_DIR" -echo "Configuration File Path: $CONFIG_FILE" -echo "MongoDump File Path: $MONGODUMP_FILE" - -# Check if the provided file exists -if [ ! -f "$MONGODUMP_FILE" ]; then - echo "Error: File '$MONGODUMP_FILE' does not exist." - exit 1 -fi - -# Check if the configuration file exists -if [ ! -f "$CONFIG_FILE" ]; then - echo "Error: Configuration file '$CONFIG_FILE' does not exist." - exit 1 -fi - -# Print details about the configuration file -echo "Configuration file details:" -ls -l "$CONFIG_FILE" - -# Extract database name from the mongodump file -TEMP_DIR=$(mktemp -d) -tar -xf "$MONGODUMP_FILE" -C "$TEMP_DIR" -DB_NAME=$(find "$TEMP_DIR/dump" -mindepth 1 -maxdepth 1 -type d -exec basename {} \;) - -if [ -z "$DB_NAME" ]; then - echo "Error: Failed to extract database name from mongodump." - exit 1 -fi - -echo "Database Name: $DB_NAME" - -# Update the docker-compose configuration file with the actual DB_HOST -DB_HOST="mongodb://db/$DB_NAME" -sed -i.bak "s|DB_HOST:.*|DB_HOST: $DB_HOST|" "$CONFIG_FILE" - -echo "Updated docker-compose file:" -cat "$CONFIG_FILE" - -echo "Copying file to Docker container" -docker cp "$MONGODUMP_FILE" op-admin-dashboard-db-1:/tmp - -FILE_NAME=$(basename "$MONGODUMP_FILE") - -echo "Clearing existing database" -docker exec op-admin-dashboard-db-1 bash -c 'mongo --eval "db.getMongo().getDBNames().forEach(function(d) { if (d !== \"admin\" && d !== \"local\") db.getSiblingDB(d).dropDatabase(); })"' +echo "Copying file to docker container" +docker cp $MONGODUMP_FILE op-admin-dashboard-db-1:/tmp -echo "Restoring the dump from $FILE_NAME to database $DB_NAME" -docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c "cd /tmp && tar xvf $FILE_NAME && mongorestore -d $DB_NAME dump/$DB_NAME" +FILE_NAME=`basename $MONGODUMP_FILE` -echo "Database restore complete." +echo "Restoring the dump from $FILE_NAME" +docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore' -# Clean up temporary directory -rm -rf "$TEMP_DIR" From 8aad0b12a8a318b7a6c214d60f1419ba58d63c96 Mon Sep 17 00:00:00 2001 From: Robin Date: Sat, 7 Sep 2024 16:25:08 -0700 Subject: [PATCH 9/9] Delete docker-compose-dev.yml.bak --- docker-compose-dev.yml.bak | 50 -------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 docker-compose-dev.yml.bak diff --git a/docker-compose-dev.yml.bak b/docker-compose-dev.yml.bak deleted file mode 100644 index cfadbf4..0000000 --- a/docker-compose-dev.yml.bak +++ /dev/null @@ -1,50 +0,0 @@ -version: "3" -services: - dashboard: - build: - context: . - dockerfile: docker/Dockerfile - args: - SERVER_IMAGE_TAG: ${SERVER_IMAGE_TAG} - image: e-mission/opdash:0.0.1 - ports: - - "8050:8050" - environment: - DASH_DEBUG_MODE: "True" - DASH_SILENCE_ROUTES_LOGGING: "False" - DASH_SERVER_PORT: 8050 - DB_HOST: mongodb://db/openpath_prod_ca_ebike - WEB_SERVER_HOST: 0.0.0.0 - SERVER_BRANCH: master - CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" - STUDY_CONFIG: "stage-program" - AUTH_TYPE: "basic" # the other option is cognito - REACT_VERSION: "18.2.0" - networks: - - emission - volumes: - - ./pages:/usr/src/app/pages - - ./utils:/usr/src/app/utils - - ./app.py:/usr/src/app/app.py - - ./app_sidebar_collapsible.py:/usr/src/app/app_sidebar_collapsible.py - deploy: - restart_policy: - condition: on-failure - db: - image: mongo:4.4.0 - deploy: - replicas: 1 - restart_policy: - condition: on-failure - volumes: - - mongo-data:/data/db - networks: - - emission - ports: - - "27017:27017" - -networks: - emission: - -volumes: - mongo-data: