From f8c740e28eb9f751a9611843cb071e6601522209 Mon Sep 17 00:00:00 2001 From: SteveYangFASTNDE <154364860+SteveYangFASTNDE@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:06:49 -0400 Subject: [PATCH] code revision --- .../code/GPR_locate_rebars.py | 1332 ++++++++--------- .../code/GPR_locate_rebars.py | 666 +++++++++ .../code/GPR_plot.py | 190 +++ .../code/Pavement_thickness_detection.py | 1 + .../code/mig_fk.py | 428 ++++++ .../Pavement_thickness_notebook.ipynb | 45 +- .../Rebar mapping/code/GPR_locate_rebars.py | 2 +- 7 files changed, 1984 insertions(+), 680 deletions(-) create mode 100644 ground-penetrating-radar/Pavement thickness measurement/code/GPR_locate_rebars.py create mode 100644 ground-penetrating-radar/Pavement thickness measurement/code/GPR_plot.py create mode 100644 ground-penetrating-radar/Pavement thickness measurement/code/mig_fk.py diff --git a/ground-penetrating-radar/Cover depth measurement/code/GPR_locate_rebars.py b/ground-penetrating-radar/Cover depth measurement/code/GPR_locate_rebars.py index ff10c48..15bf8a5 100644 --- a/ground-penetrating-radar/Cover depth measurement/code/GPR_locate_rebars.py +++ b/ground-penetrating-radar/Cover depth measurement/code/GPR_locate_rebars.py @@ -1,666 +1,666 @@ -import sys -import math -import pandas as pd -import numpy as np -from sklearn.preprocessing import minmax_scale -from scipy.signal import find_peaks -from scipy.ndimage import uniform_filter1d -from scipy.constants import c as c -import struct -sys.path.append('C:/directory/path/downloaded_py_files/') -import mig_fk -from sklearn.cluster import KMeans -import matplotlib.pyplot as plt -import matplotlib.colors as mcolors - - -def readdzt(filename, minheadsize = 1024, infoareasize = 128): - ''' - Reads DZT files and returns Pandas dataframe. - Additional details about the header format can be found in the GSSI SIR 3000 Manual pg 55 https://www.geophysical.com/wp-content/uploads/2017/10/GSSI-SIR-3000-Manual.pdf - Parameters: - - filename : DZT file name including path. - - minheadsize : The default is 1024. - - infoareasize : The default is 128. - - Returns: - - df1 : GPR data. - - df2 : GPR configuration settings. - - ''' - - fid = open(filename,'rb'); - - rh_tag = struct.unpack('h', fid.read(2))[0] # Pos 00 // 0x00ff if header, 0xfnff for old file - rh_data = struct.unpack('h', fid.read(2))[0] # Pos 02 // constant 1024 (obsolete) - rh_nsamp = struct.unpack('h', fid.read(2))[0] # Pos 04 // samples per scan - rh_bits = struct.unpack('h', fid.read(2))[0] # Pos 06 // bits per data word (8 or 16) - rh_zero = struct.unpack('h', fid.read(2))[0] # Pos 08 // Offset (0x80 or 0x8000 depends on rh_bits) - rhf_sps = struct.unpack('f', fid.read(4))[0] # Pos 10 // scans per second - rhf_spm = struct.unpack('f', fid.read(4))[0] # Pos 14 // scans per meter - rhf_mpm = struct.unpack('f', fid.read(4))[0] # Pos 18 // meters per mark - rhf_position = struct.unpack('f', fid.read(4))[0] # Pos 22 // position (ns) - rhf_range = struct.unpack('f', fid.read(4))[0] # Pos 26 // range (ns) - rh_npass = struct.unpack('h', fid.read(2))[0] # Pos 30 // num of passes for 2-D files - rhb_cdt = struct.unpack('f', fid.read(4))[0] # Pos 32 // Creation date & time - rhb_mdt = struct.unpack('f', fid.read(4))[0] # Pos 36 // Last modification date & time - rh_mapOffset = struct.unpack('h', fid.read(2))[0] # Pos 40 // offset to range gain function - rh_mapSize = struct.unpack('h',fid.read(2))[0] # Pos 42 // size of range gain function - rh_text = struct.unpack('h',fid.read(2))[0] # Pos 44 // offset to text - rh_ntext = struct.unpack('h',fid.read(2))[0] # Pos 46 // size of text - rh_proc = struct.unpack('h',fid.read(2))[0] # Pos 48 // offset to processing history - rh_nproc = struct.unpack('h',fid.read(2))[0] # Pos 50 // size of processing history - rh_nchan = struct.unpack('h',fid.read(2))[0] # Pos 52 // number of channels - rhf_espr = struct.unpack('f', fid.read(4))[0] # Pos 54 // average dielectric constant - rhf_top = struct.unpack('f',fid.read(4))[0] # Pos 58 // position in meters - rhf_depth = struct.unpack('f',fid.read(4))[0] # Pos 62 // range in meters - fid.close() - - # Figuring out the datatype - if rh_data < minheadsize: - offset = minheadsize*rh_data - else: - offset = minheadsize*rh_nchan - - if rh_bits == 8: - datatype = 'uint8' # unsigned char - elif rh_bits == 16: - datatype = 'uint16' # unsigned int - elif rh_bits == 32: - datatype = 'int32' - - # Organize the data from the reading - vec = np.fromfile(filename,dtype=datatype) - headlength = offset/(rh_bits/8) - datvec = vec[int(headlength):] - if rh_bits == 8 or rh_bits == 16: - datvec = datvec - (2**rh_bits)/2.0 - data = np.reshape(datvec,[int(len(datvec)/rh_nsamp),rh_nsamp]) - data = np.asmatrix(data) - data = data.transpose() - df1 = pd.DataFrame(data) - - # Save the configurations - config = {'minheadsize': minheadsize, - 'infoareasize': infoareasize, - 'rh_tag': rh_tag, - 'rh_data': rh_data, - 'rh_nsamp': rh_nsamp, - 'rh_bits': rh_bits, - 'rh_zero': rh_zero, - 'rhf_sps': rhf_sps, - 'rhf_spm': rhf_spm, - 'rhf_mpm': rhf_mpm, - 'rhf_position': rhf_position, - 'rhf_range': rhf_range, - 'rh_npass': rh_npass, - 'rhb_cdt': rhb_cdt, - 'rhb_mdt': rhb_mdt, - 'rh_mapOffset': rh_mapOffset, - 'rh_mapSize': rh_mapSize, - 'rh_text': rh_text, - 'rh_ntext': rh_ntext, - 'rh_proc': rh_proc, - 'rh_nproc': rh_nproc, - 'rh_nchan': rh_nchan, - 'rhf_espr': rhf_espr, - 'rhf_top': rhf_top, - 'rhf_depth': rhf_depth, - } - - index_config = ['config'] - df2 = pd.DataFrame(config, index = index_config) - df2 = df2.transpose() - - return df1, df2 - -def save_to_csv(dataframe, directory, filename, include_index=False, include_header=False): - ''' - Saves the Pandas dataframe (df1 or df2) from the readdzt function into CSV format. - - Parameters: - - dataframe: df1 or df2. - - directory: directory path to save the files. - - filename: file name. - - include_index: decides including indices or not, default: False. - - include_header: decides including headers or not, default: False. - - ''' - if not directory.endswith('/') and not directory.endswith('\\'): - directory += '/' - - filepath = f"{directory}{filename}.csv" - - # Save dataframe with specified options - dataframe.to_csv(filepath, index=include_index, header=include_header) - -def read_csv(directory): - ''' - Reads the CSV files and convert them as Pandas dataframe (df_1 or df_2). - - Parameters: - - directory: directory path (should be the same with the directory used in the save_to_csv function). - - Returns: - - df_1: GPR scan data. - - df_2: GPR configuration settings. - ''' - if not directory.endswith('/') and not directory.endswith('\\'): - directory += '/' - - filepath_data = f"{directory}{'data'}.csv" - filepath_config = f"{directory}{'config'}.csv" - - # Read dataframe - df_1 = pd.read_csv(filepath_data, header=None) - df_2 = pd.read_csv(filepath_config, index_col=0) - - return df_1, df_2 - -def config_to_variable(df): - ''' - Declares the items in df (GPR configuration settings) as a variable dictionary. - - Parameters: - - df: Pandas DataFrame with index representing variable names and a single column containing the values. - - Returns: - - variables_dict: a dictionary to declare variables. (locals().update(result_variables)) - ''' - # Create an empty dictionary to store variables - variables_dict = {} - - # Allocate variables for config dataframe (df) - for variable_name, variable_value in df.iterrows(): - # Store the variable in the dictionary - variables_dict[variable_name] = variable_value[0] - - # Adjust the wave traveling time as 0 to value (sometimes position has negative value) - if 'rhf_position' in variables_dict and variables_dict['rhf_position'] != 0: - wave_travel_time = variables_dict['rhf_range'] - variables_dict['rhf_position'] - variables_dict['rhf_position'] = 0 - variables_dict['rhf_range'] = wave_travel_time - - # Return the dictionary containing all variables - return variables_dict - -def Interquartile_Range(df_1, min_value=0.10, max_value=0.90, multiplier=1.5): - ''' - This function clips the DataFrame to remove outliers based on the interquartile range (IQR). - - Parameters: - - df_1: Input DataFrame (GPR scan data). - - min_value: The lower quantile value to calculate Q1 (default is 0.10). - - max_value: The upper quantile value to calculate Q3 (default is 0.90). - - multiplier: A multiplier to control the range for defining outliers (default is 1.5). - - Returns: - - clipped_df: DataFrame with outliers clipped based on the calculated bounds. - - ''' - # Calculate the first and third quartiles (Q1 and Q3) - Q1 = df_1.quantile(min_value).min() - Q3 = df_1.quantile(max_value).max() - - # Calculate the interquartile range (IQR) - IQR = Q3 - Q1 - - # Define the lower and upper bounds for clipping - lower_bound = Q1 - multiplier * IQR - upper_bound = Q3 + multiplier * IQR - - # Clip the entire DataFrame using the calculated bounds - clipped_df = df_1.clip(lower=lower_bound, upper=upper_bound) - clipped_df = clipped_df.astype('float64') - return clipped_df - -def data_chunk(df_1, chunk_size=300): - ''' - Splits the input DataFrame along the GPR survey line (x-axis) into chunks. - - Parameters: - - df_1: Input DataFrame containing GPR scan data. - - chunk_size: The size of each chunk along the x-axis. Default is 300. - - Returns: - - A list containing full chunks of the DataFrame, each of size chunk_size, - and possibly a last chunk containing the remaining data. - ''' - # Calculate the number of full chunks - num_full_chunks = df_1.shape[1] // chunk_size - - # Split the DataFrame into full chunks along the columns and reset the index - df_full_chunks = [df_1.iloc[:, i * chunk_size:(i + 1) * chunk_size].copy().reset_index(drop=True) for i in range(num_full_chunks)] - - # Calculate the start index of the last chunk - last_chunk_start = num_full_chunks * chunk_size - - # Include the leftover chunk - df_last_chunk = df_1.iloc[:, last_chunk_start:].copy().reset_index(drop=True) - - return df_full_chunks + [df_last_chunk] - -def gain(df, type="exp", alpha=0.2, t0=40, tmax=None): - """ - Apply gain to a specific time frame of the data in a DataFrame. - - Parameters: - - df: Input DataFrame with shape (512, 300). - - type: Type of gain ('exp' for exponential, 'pow' for power). - - alpha: Exponent value for the gain. - - t0: Start time of the gain application. - - tmax: Maximum time for gain application. - - Returns: - - df_g: DataFrame after applying gain. - """ - t = np.arange(df.shape[0], dtype=np.float64) # Assuming time indices are represented by column indices - - # Determine the time indices where the gain is applied - if tmax is not None: - mask = (t >= t0) & (t <= tmax) - else: - mask = (t >= t0) - - # Apply gain based on the specified type to each row in the DataFrame - if type == "exp": - df_g = df.copy() - expont_df = pd.DataFrame(np.exp(alpha * (t[mask] - t0)), index=df_g.loc[mask, :].index) - df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values - - elif type == "pow": - df_g = df.copy() - expont_df = pd.DataFrame(((t[mask] - t0) ** alpha), index=df_g.loc[mask, :].index) - df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values - else: - raise ValueError("Invalid type. Use 'exp' or 'pow'.") - - return df_g - -def dewow(df): - ''' - Dewow to correct the baseline of the wave. - Reference: https://github.com/iannesbitt/readgssi/blob/master/readgssi/functions.py - - Parameters: - - df: DataFrame with signal data. - - Returns: - - dewowed_df: DataFrame with dewowed signal. - - average_predicted_values: Numpy array with average predicted values. - ''' - # Fit the polynomial model for each column and average the predicted values - predicted_values = np.zeros_like(df.values, dtype=float) - - for i, column in enumerate(df.columns): - signal_column = df[column] - model = np.polyfit(range(len(signal_column)), signal_column, 3) - predicted_values[:, i] = np.polyval(model, range(len(signal_column))) - - average_predicted_values = np.mean(predicted_values, axis=1) - - # Apply the filter to each column - dewowed_df = df - average_predicted_values[:, np.newaxis] - - return dewowed_df - -def bgr(ar, win=0): - ''' - Horizontal background removal. It uses the moving average method to cancel out continuous horizontal signal along size of the window. - - Parameters: - - ar: GPR B-scan Pandas dataframe. - - win: window for uniform_filter1d. - - Returns: - - ar_copy: Dataframe after background removal. - ''' - # Make a copy of the input array - ar_copy = ar.copy() - - window = int(win) - - # Perform background removal on the copied array - ar_copy -= uniform_filter1d(ar_copy, size=window, mode='nearest') - - # Return the modified copy - return ar_copy - -def Timezero_mean(df_1, rhf_position, rhf_range): - ''' - Mean time-zero correction. - - Parameters: - - df_1 : GPR B-scan Pandas dataframe. - - rhf_position : The starting point for measuring positions in your GPR data (ns). - - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). - - Returns: - - adjusted_time0data_t : dataframe after time zero correction. - - time0linspace: The discrete time value at the mean time. - - new_rh_nsamp : The number of rows after cut out. - ''' - time0array = [] - # Define time space (n) along with depth dimension - n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) - # from 0 to scans - for i in range(0, df_1.shape[1]): - # find peaks - temp = df_1[i] - temp = minmax_scale(temp, feature_range=(-1, 1)) - peaks, _ = find_peaks(temp, prominence=0.1, distance = 40) - - # index of the peaks - neg_peaks = [] - for peak in peaks: - if peak > 100: - neg_peaks.append(peak) - # time0array contains the indices for time zero - time0array.append(n[neg_peaks[0]] - 0.06) #0.06 is the small gap for showing the first peaks - - # time0linspace is the average time zero index in the time space (n) - time0linspace = next((i for i, value in enumerate(n) if value >= np.mean(time0array)), None) - # cut out indices before the first positive peak - time0data = df_1[time0linspace:-1] if time0linspace is not None else [] - new_rh_nsamp = time0data.shape[0] - - return time0data, time0linspace, new_rh_nsamp - -def Timezero_individual(df_1, rhf_position, rhf_range): - ''' - Scan-by-scan Time-zero correction. - - Parameters: - - df_1 : GPR B-scan Pandas dataframe. - - rhf_position : The starting point for measuring positions in your GPR data (ns). - - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). - - Returns: - - adjusted_time0data_t : dataframe after time zero correction. - - new_rh_nsamp : The number of rows after cut out. - ''' - - first_peaks_index = [] - time0data_cutout = [] - time0linspace = [] - time0data_reindex = [] - #Define time space (n) along with depth dimension - n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) - #from 0 to scans - for i in range(df_1.shape[1]): - temp = df_1.iloc[:, i] # Access the ith column using iloc - temp = minmax_scale(temp, feature_range=(-1, 1)) - peaks, _ = find_peaks(temp, prominence=0.2, distance=15) - - #first peaks - first_peaks_index.append(peaks[0]) - - #time0linspace is the average time zero index in the time space (n) - time0linspace.append(n[peaks[0]]) - - #time0data_cutout is cut out indices before the first positive peak - time0data_cutout.append(df_1.iloc[:, i][peaks[0]:-1]) - - #new index is for time zeroing based on the 1st positive peak (depth indices are adjusted based on the 1st positive peak) - new_index = np.arange(-peaks[0], -peaks[0] + len(df_1)) - - #reindexed dataframe (without cutting) - df_reindexed = (new_index, np.array(df_1.iloc[:, i])) - #reindexed time0data - time0data_reindex.append(df_reindexed) - - x=[] - y=[] - - #Need to reindex again since the data length and order is not consistant. We cut out the uncommon indices - for i in range (0, df_1.shape[1], 1): - x.append(np.arange(0, len(time0data_cutout[i]),1)) - y.append(time0data_cutout[i]) - - #Calculate common index based on max and min value of x - common_range = np.arange(max(map(min, x)), min(map(max, x)) + 1) - adjusted_time0data = [] - - #Append the cut out data in the data frame - for i in range (0, df_1.shape[1], 1): - df_temp = pd.DataFrame({'X': x[i], 'Y': y[i]}) - df_temp_common_range = df_temp[df_temp['X'].isin(common_range)] - adjusted_time0data.append(np.array(df_temp_common_range['Y'])) - adjusted_time0data = pd.DataFrame(adjusted_time0data) - adjusted_time0data_t = adjusted_time0data.transpose() - new_rh_nsamp = adjusted_time0data_t.shape[0] - return adjusted_time0data_t, new_rh_nsamp - -def FK_migration(data, rhf_spm, rhf_sps, rhf_range, rh_nsamp, rhf_espr): - ''' - F-K migration. Recommend changing the dielectric if the migration result is poor. - - Parameters: - - data : GPR B-scan dataframe after time-zero correction. - - rhf_spm : Scans per meter. - - rhf_sps : Scans per second. - - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). - - rh_nsamp : The number of rows in the DataFrame. - - rhf_espr : Dielectric constant of the media. - - Returns: - - migrated_data : Migrated DataFrame. - - profilePos : Linear space of the x-axis (along the survey line) after migration. - - dt : Time space interval. - - dx : x space interval. - - velocity : Average velocity. - - ''' - # Calculate pos_x and dx based on the scans per second or scans per meter - if rhf_spm != 0: - pos_x, dx = np.linspace(0.0, data.shape[1]/rhf_spm, data.shape[1], retstep=True) - profilePos = pos_x - else: - time_x, dx = np.linspace(0.0, data.shape[1]/rhf_sps, data.shape[1], retstep=True) - profilePos = time_x - - # Calculate the Two-way traveling time and time interval - twtt, dt = np.linspace(0, rhf_range, int(rh_nsamp), retstep=True) - - # Calculate velocity based on the dielectric constant in the configuration (df2) - velocity = (c)/math.sqrt(rhf_espr) * 1e-9 #m/ns - - # Apply F-K migration - migrated_data,twtt,migProfilePos = mig_fk.fkmig(data, dt, dx, velocity) - - # Update Linear space of the x-axis - profilePos = migProfilePos + profilePos[0] - - return migrated_data, profilePos, dt, dx, velocity - - -def Locate_rebar(migrated_data, rhf_depth, rh_nsamp, profilePos, amplitude_threshold = 0.70, depth_threshold = 0.15, num_clusters = 14, random_state = 42, midpoint_factor=0.4): - ''' - Locates rebar positions in migrated data based on specified parameters. (Old version) - - Parameters: - - migrated_data: Input DataFrame containing migrated data. - - rhf_depth: Depth (m). - - rh_nsamp: The number of rows in the DataFrame. - - amplitude_threshold: Threshold for rebar amplitude detection (Gets more points when the value is low and vice versa). - - depth_threshold: Threshold to skip the 1st positive and negative peak (Plot the migrated result first, and determine the value). - - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). - - random_state: Random seed for reproducibility in clustering (default is 42). - - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). - - Returns: - - rebar_positions: DataFrame containing the detected rebar positions. - - ''' - - fig, ax = plt.subplots(figsize=(15, 2)) - - # Calculate depth per point and depth axis in inches - depth_per_point = rhf_depth / rh_nsamp - depth_axis = np.linspace(0, depth_per_point * len(migrated_data) * 39.37, len(migrated_data)) - - # Convert survey line axis to inches - survey_line_axis = profilePos * 39.37 - - vmin, vmax = migrated_data.min(), migrated_data.max() - - # Calculate the midpoint based on the provided factor - midpoint = vmin + (vmax - vmin) * midpoint_factor - - cmap = plt.cm.gray - norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) - - heatmap = ax.imshow(migrated_data, cmap='Greys_r', extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()], norm=norm) - - # Add a colorbar - cbar = plt.colorbar(heatmap, ax=ax) - - # Normalize the data - normalized_migrated_data = minmax_scale(migrated_data, feature_range=(-1, 1)) - - # Create meshgrid of indices - x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) - - # Highlight data points with values higher than threshold - threshold = amplitude_threshold - # Skip the first positive peak based on the depth threshold you defined - threshold_index = int(depth_threshold * normalized_migrated_data.shape[0]) - highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)], - y_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)])) - - # Use KMeans clustering to identify representative points - num_clusters = num_clusters - kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(highlighted_points) - - # Get the cluster centers - cluster_centers_m = kmeans.cluster_centers_ - - # Convert cluster centers to inches - cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], - cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) - - # Overlay scatter points at cluster centers - scatter = ax.scatter(cluster_centers_inches_m[:, 0], cluster_centers_inches_m[:, 1], - c='red', marker='o', s=50, edgecolors='black') - - # Set labels for axes - ax.set_xlabel('GPR Survey line (inch)') - ax.set_ylabel('Depth (inch)') - - # Show the plot - return plt.show() - -def custom_minmax_scale(data, new_min, new_max): - ''' - Custom minmax scale function without losing resolution. GPR B-scan amplitude resolution goes bad with scikit-learn minmax scale. - - Parameters: - - data: numpy array. - - new_min: the minimum value in your normalization. - - new_max: the maximum value in your normalization. - - Returns: - - scaled_data: Normalized numpy array. - ''' - min_val = data.min() - max_val = data.max() - scaled_data = ((data - min_val) / (max_val - min_val)) * (new_max - new_min) + new_min - return scaled_data - -def locate_rebar_consecutive(migrated_data, velocity, rhf_range, rh_nsamp, profilePos, - vmin=0.15, vmax=0.7, amplitude_threshold=0.55, depth_threshold=8, - minimal_y_index=10, num_clusters=50, random_state=42, - redundancy_filter=0.6): - ''' - Locates rebar positions in migrated data based on specified parameters. - - Parameters: - - migrated_data: migrated GPR B-scan numpy array . - - velocity: The wave speed in the media. (c)/math.sqrt(rhf_espr) * 1e-9 m/ns - - rhf_range: The time it takes for the radar signals to travel to the subsurface and return (ns). - - rh_nsamp: The number of rows in the GPR B-scan. - - profilePos: x axis (survey line axis) after migration. - - vmin, vmax: The parameter used in visualizing B-scans. Controls B-scan amplitude contrast ranges. - - amplitude_threshold: Threshold for rebar amplitude detection. - - depth_threshold: The maximum depth of the bridge in inches. - - minimal_y_index: The initial y-axis index of the B-scan to skip the 1st positive peak from the rebar counting. - - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). - - random_state: Random seed for reproducibility in clustering (default is 42). - - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). - - Returns: - - figure: Scatter rebar points on the migrated B-scan. - - ''' - if rh_nsamp != len(migrated_data): - raise ValueError("Length of migrated_data should be equal to rh_nsamp") - - fig, ax = plt.subplots(figsize=(15, 12)) - # Calculate depth per point and depth axis in inches - depth = (velocity/2) * rhf_range # one way travel (m) - depth_per_point = depth / rh_nsamp # (m) - depth_axis = np.linspace(0, depth_per_point * rh_nsamp * 39.37, rh_nsamp) - - # Convert survey line axis to inches - survey_line_axis = profilePos * 39.37 - - new_min = 0 - new_max = 1 - - # Normalize the data - normalized_migrated_data = custom_minmax_scale(migrated_data, new_min, new_max) - - # Plot the heatmap with custom colormap - cmap = plt.cm.Greys_r - norm = mcolors.Normalize(vmin=vmin, vmax=vmax) - heatmap = ax.imshow(normalized_migrated_data, cmap=cmap, norm=norm, extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()]) - cbar = plt.colorbar(heatmap, ax=ax, shrink=0.5) - # Create meshgrid of indices - x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) - threshold = amplitude_threshold # Adjust this threshold based on your data - bridge_depth = int(depth_threshold / (depth_per_point * 39.37)) - highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)], - y_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)])) - - # Use KMeans clustering to identify representative points - num_clusters = num_clusters - kmeans = KMeans(n_clusters=num_clusters, random_state=random_state).fit(highlighted_points) - - # Get the cluster centers - cluster_centers_m = kmeans.cluster_centers_ - - # Convert cluster centers to inches - cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], - cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) - - # Remove points that are very close together and deeper, keeping only the one with the smaller y value - filtered_cluster_centers = [] - for x, y in cluster_centers_inches_m: - close_points = [(x2, y2) for x2, y2 in filtered_cluster_centers if abs(x2 - x) < redundancy_filter] - if close_points: - # There are already points close to this one - if y < min(close_points, key=lambda p: p[1])[1]: - # This point has a smaller y value, replace the existing ones - filtered_cluster_centers = [p for p in filtered_cluster_centers if (abs(p[0] - x) >= redundancy_filter) or (abs(p[1] - y) >= redundancy_filter)] - filtered_cluster_centers.append((x, y)) - else: - # No close points yet, add this one - filtered_cluster_centers.append((x, y)) - - filtered_cluster_centers = np.array(filtered_cluster_centers) - - # Overlay scatter points at filtered cluster centers - scatter = ax.scatter(filtered_cluster_centers[:, 0], filtered_cluster_centers[:, 1], - c='red', marker='o', s=30, edgecolors='black') - - # Set labels for axes - ax.set_xlabel('GPR Survey line (inch)', fontsize=20) - ax.set_ylabel('Depth (inch)', fontsize=20) - ax.set_aspect(3) - #ax.set_ylim(15, 0) - cbar.ax.tick_params(labelsize=14) # Adjust the font size as needed - - # Set font size for axis labels and ticks - ax.tick_params(axis='both', which='major', labelsize=16) # Adjust the font size as needed - plt.show() - return cluster_centers_inches_m +import sys +import math +import pandas as pd +import numpy as np +from sklearn.preprocessing import minmax_scale +from scipy.signal import find_peaks +from scipy.ndimage import uniform_filter1d +from scipy.constants import c as c +import struct +sys.path.append('C:/directory/path/downloaded_py_files/') +import mig_fk +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +import GPR_plot as plot + +def readdzt(filename, minheadsize = 1024, infoareasize = 128): + ''' + Reads DZT files and returns Pandas dataframe. + Additional details about the header format can be found in the GSSI SIR 3000 Manual pg 55 https://www.geophysical.com/wp-content/uploads/2017/10/GSSI-SIR-3000-Manual.pdf + Parameters: + - filename : DZT file name including path. + - minheadsize : The default is 1024. + - infoareasize : The default is 128. + + Returns: + - df1 : GPR data. + - df2 : GPR configuration settings. + + ''' + + fid = open(filename,'rb'); + + rh_tag = struct.unpack('h', fid.read(2))[0] # Pos 00 // 0x00ff if header, 0xfnff for old file + rh_data = struct.unpack('h', fid.read(2))[0] # Pos 02 // constant 1024 (obsolete) + rh_nsamp = struct.unpack('h', fid.read(2))[0] # Pos 04 // samples per scan + rh_bits = struct.unpack('h', fid.read(2))[0] # Pos 06 // bits per data word (8 or 16) + rh_zero = struct.unpack('h', fid.read(2))[0] # Pos 08 // Offset (0x80 or 0x8000 depends on rh_bits) + rhf_sps = struct.unpack('f', fid.read(4))[0] # Pos 10 // scans per second + rhf_spm = struct.unpack('f', fid.read(4))[0] # Pos 14 // scans per meter + rhf_mpm = struct.unpack('f', fid.read(4))[0] # Pos 18 // meters per mark + rhf_position = struct.unpack('f', fid.read(4))[0] # Pos 22 // position (ns) + rhf_range = struct.unpack('f', fid.read(4))[0] # Pos 26 // range (ns) + rh_npass = struct.unpack('h', fid.read(2))[0] # Pos 30 // num of passes for 2-D files + rhb_cdt = struct.unpack('f', fid.read(4))[0] # Pos 32 // Creation date & time + rhb_mdt = struct.unpack('f', fid.read(4))[0] # Pos 36 // Last modification date & time + rh_mapOffset = struct.unpack('h', fid.read(2))[0] # Pos 40 // offset to range gain function + rh_mapSize = struct.unpack('h',fid.read(2))[0] # Pos 42 // size of range gain function + rh_text = struct.unpack('h',fid.read(2))[0] # Pos 44 // offset to text + rh_ntext = struct.unpack('h',fid.read(2))[0] # Pos 46 // size of text + rh_proc = struct.unpack('h',fid.read(2))[0] # Pos 48 // offset to processing history + rh_nproc = struct.unpack('h',fid.read(2))[0] # Pos 50 // size of processing history + rh_nchan = struct.unpack('h',fid.read(2))[0] # Pos 52 // number of channels + rhf_espr = struct.unpack('f', fid.read(4))[0] # Pos 54 // average dielectric constant + rhf_top = struct.unpack('f',fid.read(4))[0] # Pos 58 // position in meters + rhf_depth = struct.unpack('f',fid.read(4))[0] # Pos 62 // range in meters + fid.close() + + # Figuring out the datatype + if rh_data < minheadsize: + offset = minheadsize*rh_data + else: + offset = minheadsize*rh_nchan + + if rh_bits == 8: + datatype = 'uint8' # unsigned char + elif rh_bits == 16: + datatype = 'uint16' # unsigned int + elif rh_bits == 32: + datatype = 'int32' + + # Organize the data from the reading + vec = np.fromfile(filename,dtype=datatype) + headlength = offset/(rh_bits/8) + datvec = vec[int(headlength):] + if rh_bits == 8 or rh_bits == 16: + datvec = datvec - (2**rh_bits)/2.0 + data = np.reshape(datvec,[int(len(datvec)/rh_nsamp),rh_nsamp]) + data = np.asmatrix(data) + data = data.transpose() + df1 = pd.DataFrame(data) + + # Save the configurations + config = {'minheadsize': minheadsize, + 'infoareasize': infoareasize, + 'rh_tag': rh_tag, + 'rh_data': rh_data, + 'rh_nsamp': rh_nsamp, + 'rh_bits': rh_bits, + 'rh_zero': rh_zero, + 'rhf_sps': rhf_sps, + 'rhf_spm': rhf_spm, + 'rhf_mpm': rhf_mpm, + 'rhf_position': rhf_position, + 'rhf_range': rhf_range, + 'rh_npass': rh_npass, + 'rhb_cdt': rhb_cdt, + 'rhb_mdt': rhb_mdt, + 'rh_mapOffset': rh_mapOffset, + 'rh_mapSize': rh_mapSize, + 'rh_text': rh_text, + 'rh_ntext': rh_ntext, + 'rh_proc': rh_proc, + 'rh_nproc': rh_nproc, + 'rh_nchan': rh_nchan, + 'rhf_espr': rhf_espr, + 'rhf_top': rhf_top, + 'rhf_depth': rhf_depth, + } + + index_config = ['config'] + df2 = pd.DataFrame(config, index = index_config) + df2 = df2.transpose() + + return df1, df2 + +def save_to_csv(dataframe, directory, filename, include_index=False, include_header=False): + ''' + Saves the Pandas dataframe (df1 or df2) from the readdzt function into CSV format. + + Parameters: + - dataframe: df1 or df2. + - directory: directory path to save the files. + - filename: file name. + - include_index: decides including indices or not, default: False. + - include_header: decides including headers or not, default: False. + + ''' + if not directory.endswith('/') and not directory.endswith('\\'): + directory += '/' + + filepath = f"{directory}{filename}.csv" + + # Save dataframe with specified options + dataframe.to_csv(filepath, index=include_index, header=include_header) + +def read_csv(directory): + ''' + Reads the CSV files and convert them as Pandas dataframe (df_1 or df_2). + + Parameters: + - directory: directory path (should be the same with the directory used in the save_to_csv function). + + Returns: + - df_1: GPR scan data. + - df_2: GPR configuration settings. + ''' + if not directory.endswith('/') and not directory.endswith('\\'): + directory += '/' + + filepath_data = f"{directory}{'data'}.csv" + filepath_config = f"{directory}{'config'}.csv" + + # Read dataframe + df_1 = pd.read_csv(filepath_data, header=None) + df_2 = pd.read_csv(filepath_config, index_col=0) + + return df_1, df_2 + +def config_to_variable(df): + ''' + Declares the items in df (GPR configuration settings) as a variable dictionary. + + Parameters: + - df: Pandas DataFrame with index representing variable names and a single column containing the values. + + Returns: + - variables_dict: a dictionary to declare variables. (locals().update(result_variables)) + ''' + # Create an empty dictionary to store variables + variables_dict = {} + + # Allocate variables for config dataframe (df) + for variable_name, variable_value in df.iterrows(): + # Store the variable in the dictionary + variables_dict[variable_name] = variable_value.iloc[0] + + # Adjust the wave traveling time as 0 to value (sometimes position has negative value) + if 'rhf_position' in variables_dict and variables_dict['rhf_position'] != 0: + wave_travel_time = variables_dict['rhf_range'] - variables_dict['rhf_position'] + variables_dict['rhf_position'] = 0 + variables_dict['rhf_range'] = wave_travel_time + + # Return the dictionary containing all variables + return variables_dict + +def Interquartile_Range(df_1, min_value=0.10, max_value=0.90, multiplier=1.5): + ''' + This function clips the DataFrame to remove outliers based on the interquartile range (IQR). + + Parameters: + - df_1: Input DataFrame (GPR scan data). + - min_value: The lower quantile value to calculate Q1 (default is 0.10). + - max_value: The upper quantile value to calculate Q3 (default is 0.90). + - multiplier: A multiplier to control the range for defining outliers (default is 1.5). + + Returns: + - clipped_df: DataFrame with outliers clipped based on the calculated bounds. + + ''' + # Calculate the first and third quartiles (Q1 and Q3) + Q1 = df_1.quantile(min_value).min() + Q3 = df_1.quantile(max_value).max() + + # Calculate the interquartile range (IQR) + IQR = Q3 - Q1 + + # Define the lower and upper bounds for clipping + lower_bound = Q1 - multiplier * IQR + upper_bound = Q3 + multiplier * IQR + + # Clip the entire DataFrame using the calculated bounds + clipped_df = df_1.clip(lower=lower_bound, upper=upper_bound) + clipped_df = clipped_df.astype('float64') + return clipped_df + +def data_chunk(df_1, chunk_size=300): + ''' + Splits the input DataFrame along the GPR survey line (x-axis) into chunks. + + Parameters: + - df_1: Input DataFrame containing GPR scan data. + - chunk_size: The size of each chunk along the x-axis. Default is 300. + + Returns: + - A list containing full chunks of the DataFrame, each of size chunk_size, + and possibly a last chunk containing the remaining data. + ''' + # Calculate the number of full chunks + num_full_chunks = df_1.shape[1] // chunk_size + + # Split the DataFrame into full chunks along the columns and reset the index + df_full_chunks = [df_1.iloc[:, i * chunk_size:(i + 1) * chunk_size].copy().reset_index(drop=True) for i in range(num_full_chunks)] + + # Calculate the start index of the last chunk + last_chunk_start = num_full_chunks * chunk_size + + # Include the leftover chunk + df_last_chunk = df_1.iloc[:, last_chunk_start:].copy().reset_index(drop=True) + + return df_full_chunks + [df_last_chunk] + +def gain(df, type="exp", alpha=0.2, t0=40, tmax=None): + """ + Apply gain to a specific time frame of the data in a DataFrame. + + Parameters: + - df: Input DataFrame with shape (512, 300). + - type: Type of gain ('exp' for exponential, 'pow' for power). + - alpha: Exponent value for the gain. + - t0: Start time of the gain application. + - tmax: Maximum time for gain application. + + Returns: + - df_g: DataFrame after applying gain. + """ + t = np.arange(df.shape[0], dtype=np.float64) # Assuming time indices are represented by column indices + + # Determine the time indices where the gain is applied + if tmax is not None: + mask = (t >= t0) & (t <= tmax) + else: + mask = (t >= t0) + + # Apply gain based on the specified type to each row in the DataFrame + if type == "exp": + df_g = df.copy() + expont_df = pd.DataFrame(np.exp(alpha * (t[mask] - t0)), index=df_g.loc[mask, :].index) + df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values + + elif type == "pow": + df_g = df.copy() + expont_df = pd.DataFrame(((t[mask] - t0) ** alpha), index=df_g.loc[mask, :].index) + df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values + else: + raise ValueError("Invalid type. Use 'exp' or 'pow'.") + + return df_g + +def dewow(df): + ''' + Dewow to correct the baseline of the wave. + Reference: https://github.com/iannesbitt/readgssi/blob/master/readgssi/functions.py + + Parameters: + - df: DataFrame with signal data. + + Returns: + - dewowed_df: DataFrame with dewowed signal. + - average_predicted_values: Numpy array with average predicted values. + ''' + # Fit the polynomial model for each column and average the predicted values + predicted_values = np.zeros_like(df.values, dtype=float) + + for i, column in enumerate(df.columns): + signal_column = df[column] + model = np.polyfit(range(len(signal_column)), signal_column, 3) + predicted_values[:, i] = np.polyval(model, range(len(signal_column))) + + average_predicted_values = np.mean(predicted_values, axis=1) + + # Apply the filter to each column + dewowed_df = df - average_predicted_values[:, np.newaxis] + + return dewowed_df + +def bgr(ar, win=0): + ''' + Horizontal background removal. It uses the moving average method to cancel out continuous horizontal signal along size of the window. + + Parameters: + - ar: GPR B-scan Pandas dataframe. + - win: window for uniform_filter1d. + + Returns: + - ar_copy: Dataframe after background removal. + ''' + # Make a copy of the input array + ar_copy = ar.copy() + + window = int(win) + + # Perform background removal on the copied array + ar_copy -= uniform_filter1d(ar_copy, size=window, mode='nearest') + + # Return the modified copy + return ar_copy + +def Timezero_mean(df_1, rhf_position, rhf_range): + ''' + Mean time-zero correction. + + Parameters: + - df_1 : GPR B-scan Pandas dataframe. + - rhf_position : The starting point for measuring positions in your GPR data (ns). + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + + Returns: + - adjusted_time0data_t : dataframe after time zero correction. + - time0linspace: The discrete time value at the mean time. + - new_rh_nsamp : The number of rows after cut out. + ''' + time0array = [] + # Define time space (n) along with depth dimension + n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) + # from 0 to scans + for i in range(0, df_1.shape[1]): + # find peaks + temp = df_1[i] + temp = minmax_scale(temp, feature_range=(-1, 1)) + peaks, _ = find_peaks(temp, prominence=0.1, distance = 40) + + # index of the peaks + neg_peaks = [] + for peak in peaks: + if peak > 100: + neg_peaks.append(peak) + # time0array contains the indices for time zero + time0array.append(n[neg_peaks[0]] - 0.06) #0.06 is the small gap for showing the first peaks + + # time0linspace is the average time zero index in the time space (n) + time0linspace = next((i for i, value in enumerate(n) if value >= np.mean(time0array)), None) + # cut out indices before the first positive peak + time0data = df_1[time0linspace:-1] if time0linspace is not None else [] + new_rh_nsamp = time0data.shape[0] + + return time0data, time0linspace, new_rh_nsamp + +def Timezero_individual(df_1, rhf_position, rhf_range): + ''' + Scan-by-scan Time-zero correction. + + Parameters: + - df_1 : GPR B-scan Pandas dataframe. + - rhf_position : The starting point for measuring positions in your GPR data (ns). + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + + Returns: + - adjusted_time0data_t : dataframe after time zero correction. + - new_rh_nsamp : The number of rows after cut out. + ''' + + first_peaks_index = [] + time0data_cutout = [] + time0linspace = [] + time0data_reindex = [] + #Define time space (n) along with depth dimension + n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) + #from 0 to scans + for i in range(df_1.shape[1]): + temp = df_1.iloc[:, i] # Access the ith column using iloc + temp = minmax_scale(temp, feature_range=(-1, 1)) + peaks, _ = find_peaks(temp, prominence=0.2, distance=15) + + #first peaks + first_peaks_index.append(peaks[0]) + + #time0linspace is the average time zero index in the time space (n) + time0linspace.append(n[peaks[0]]) + + #time0data_cutout is cut out indices before the first positive peak + time0data_cutout.append(df_1.iloc[:, i][peaks[0]:-1]) + + #new index is for time zeroing based on the 1st positive peak (depth indices are adjusted based on the 1st positive peak) + new_index = np.arange(-peaks[0], -peaks[0] + len(df_1)) + + #reindexed dataframe (without cutting) + df_reindexed = (new_index, np.array(df_1.iloc[:, i])) + #reindexed time0data + time0data_reindex.append(df_reindexed) + + x=[] + y=[] + + #Need to reindex again since the data length and order is not consistant. We cut out the uncommon indices + for i in range (0, df_1.shape[1], 1): + x.append(np.arange(0, len(time0data_cutout[i]),1)) + y.append(time0data_cutout[i]) + + #Calculate common index based on max and min value of x + common_range = np.arange(max(map(min, x)), min(map(max, x)) + 1) + adjusted_time0data = [] + + #Append the cut out data in the data frame + for i in range (0, df_1.shape[1], 1): + df_temp = pd.DataFrame({'X': x[i], 'Y': y[i]}) + df_temp_common_range = df_temp[df_temp['X'].isin(common_range)] + adjusted_time0data.append(np.array(df_temp_common_range['Y'])) + adjusted_time0data = pd.DataFrame(adjusted_time0data) + adjusted_time0data_t = adjusted_time0data.transpose() + new_rh_nsamp = adjusted_time0data_t.shape[0] + return adjusted_time0data_t, new_rh_nsamp + +def FK_migration(data, rhf_spm, rhf_sps, rhf_range, rh_nsamp, rhf_espr): + ''' + F-K migration. Recommend changing the dielectric if the migration result is poor. + + Parameters: + - data : GPR B-scan dataframe after time-zero correction. + - rhf_spm : Scans per meter. + - rhf_sps : Scans per second. + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + - rh_nsamp : The number of rows in the DataFrame. + - rhf_espr : Dielectric constant of the media. + + Returns: + - migrated_data : Migrated DataFrame. + - profilePos : Linear space of the x-axis (along the survey line) after migration. + - dt : Time space interval. + - dx : x space interval. + - velocity : Average velocity. + + ''' + # Calculate pos_x and dx based on the scans per second or scans per meter + if rhf_spm != 0: + pos_x, dx = np.linspace(0.0, data.shape[1]/rhf_spm, data.shape[1], retstep=True) + profilePos = pos_x + else: + time_x, dx = np.linspace(0.0, data.shape[1]/rhf_sps, data.shape[1], retstep=True) + profilePos = time_x + + # Calculate the Two-way traveling time and time interval + twtt, dt = np.linspace(0, rhf_range, int(rh_nsamp), retstep=True) + + # Calculate velocity based on the dielectric constant in the configuration (df2) + velocity = (c)/math.sqrt(rhf_espr) * 1e-9 #m/ns + + # Apply F-K migration + migrated_data,twtt,migProfilePos = mig_fk.fkmig(data, dt, dx, velocity) + + # Update Linear space of the x-axis + profilePos = migProfilePos + profilePos[0] + + return migrated_data, profilePos, dt, dx, velocity + + +def Locate_rebar(migrated_data, rhf_depth, rh_nsamp, profilePos, amplitude_threshold = 0.70, depth_threshold = 0.15, num_clusters = 14, random_state = 42, midpoint_factor=0.4): + ''' + Locates rebar positions in migrated data based on specified parameters. (Old version) + + Parameters: + - migrated_data: Input DataFrame containing migrated data. + - rhf_depth: Depth (m). + - rh_nsamp: The number of rows in the DataFrame. + - amplitude_threshold: Threshold for rebar amplitude detection (Gets more points when the value is low and vice versa). + - depth_threshold: Threshold to skip the 1st positive and negative peak (Plot the migrated result first, and determine the value). + - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). + - random_state: Random seed for reproducibility in clustering (default is 42). + - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). + + Returns: + - rebar_positions: DataFrame containing the detected rebar positions. + + ''' + + fig, ax = plt.subplots(figsize=(15, 2)) + + # Calculate depth per point and depth axis in inches + depth_per_point = rhf_depth / rh_nsamp + depth_axis = np.linspace(0, depth_per_point * len(migrated_data) * 39.37, len(migrated_data)) + + # Convert survey line axis to inches + survey_line_axis = profilePos * 39.37 + + vmin, vmax = migrated_data.min(), migrated_data.max() + + # Calculate the midpoint based on the provided factor + midpoint = vmin + (vmax - vmin) * midpoint_factor + + cmap = plt.cm.gray + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) + + heatmap = ax.imshow(migrated_data, cmap='Greys_r', extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()], norm=norm) + + # Add a colorbar + cbar = plt.colorbar(heatmap, ax=ax) + + # Normalize the data + normalized_migrated_data = minmax_scale(migrated_data, feature_range=(-1, 1)) + + # Create meshgrid of indices + x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) + + # Highlight data points with values higher than threshold + threshold = amplitude_threshold + # Skip the first positive peak based on the depth threshold you defined + threshold_index = int(depth_threshold * normalized_migrated_data.shape[0]) + highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)], + y_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)])) + + # Use KMeans clustering to identify representative points + num_clusters = num_clusters + kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(highlighted_points) + + # Get the cluster centers + cluster_centers_m = kmeans.cluster_centers_ + + # Convert cluster centers to inches + cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], + cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) + + # Overlay scatter points at cluster centers + scatter = ax.scatter(cluster_centers_inches_m[:, 0], cluster_centers_inches_m[:, 1], + c='red', marker='o', s=50, edgecolors='black') + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)') + ax.set_ylabel('Depth (inch)') + + # Show the plot + return plt.show() + +def custom_minmax_scale(data, new_min, new_max): + ''' + Custom minmax scale function without losing resolution. GPR B-scan amplitude resolution goes bad with scikit-learn minmax scale. + + Parameters: + - data: numpy array. + - new_min: the minimum value in your normalization. + - new_max: the maximum value in your normalization. + + Returns: + - scaled_data: Normalized numpy array. + ''' + min_val = data.min() + max_val = data.max() + scaled_data = ((data - min_val) / (max_val - min_val)) * (new_max - new_min) + new_min + return scaled_data + +def locate_rebar_consecutive(migrated_data, velocity, rhf_range, rh_nsamp, profilePos, + vmin=0.15, vmax=0.7, amplitude_threshold=0.55, depth_threshold=8, + minimal_y_index=10, num_clusters=50, random_state=42, + redundancy_filter=0.6): + ''' + Locates rebar positions in migrated data based on specified parameters. + + Parameters: + - migrated_data: migrated GPR B-scan numpy array . + - velocity: The wave speed in the media. (c)/math.sqrt(rhf_espr) * 1e-9 m/ns + - rhf_range: The time it takes for the radar signals to travel to the subsurface and return (ns). + - rh_nsamp: The number of rows in the GPR B-scan. + - profilePos: x axis (survey line axis) after migration. + - vmin, vmax: The parameter used in visualizing B-scans. Controls B-scan amplitude contrast ranges. + - amplitude_threshold: Threshold for rebar amplitude detection. + - depth_threshold: The maximum depth of the bridge in inches. + - minimal_y_index: The initial y-axis index of the B-scan to skip the 1st positive peak from the rebar counting. + - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). + - random_state: Random seed for reproducibility in clustering (default is 42). + - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). + + Returns: + - figure: Scatter rebar points on the migrated B-scan. + + ''' + if rh_nsamp != len(migrated_data): + raise ValueError("Length of migrated_data should be equal to rh_nsamp") + + fig, ax = plt.subplots(figsize=(15, 12)) + # Calculate depth per point and depth axis in inches + depth = (velocity/2) * rhf_range # one way travel (m) + depth_per_point = depth / rh_nsamp # (m) + depth_axis = np.linspace(0, depth_per_point * rh_nsamp * 39.37, rh_nsamp) + + # Convert survey line axis to inches + survey_line_axis = profilePos * 39.37 + + new_min = 0 + new_max = 1 + + # Normalize the data + normalized_migrated_data = custom_minmax_scale(migrated_data, new_min, new_max) + + # Plot the heatmap with custom colormap + cmap = plt.cm.Greys_r + norm = mcolors.Normalize(vmin=vmin, vmax=vmax) + heatmap = ax.imshow(normalized_migrated_data, cmap=cmap, norm=norm, extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()]) + cbar = plt.colorbar(heatmap, ax=ax, shrink=0.5) + # Create meshgrid of indices + x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) + threshold = amplitude_threshold # Adjust this threshold based on your data + bridge_depth = int(depth_threshold / (depth_per_point * 39.37)) + highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)], + y_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)])) + + # Use KMeans clustering to identify representative points + num_clusters = num_clusters + kmeans = KMeans(n_clusters=num_clusters, random_state=random_state).fit(highlighted_points) + + # Get the cluster centers + cluster_centers_m = kmeans.cluster_centers_ + + # Convert cluster centers to inches + cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], + cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) + + # Remove points that are very close together and deeper, keeping only the one with the smaller y value + filtered_cluster_centers = [] + for x, y in cluster_centers_inches_m: + close_points = [(x2, y2) for x2, y2 in filtered_cluster_centers if abs(x2 - x) < redundancy_filter] + if close_points: + # There are already points close to this one + if y < min(close_points, key=lambda p: p[1])[1]: + # This point has a smaller y value, replace the existing ones + filtered_cluster_centers = [p for p in filtered_cluster_centers if (abs(p[0] - x) >= redundancy_filter) or (abs(p[1] - y) >= redundancy_filter)] + filtered_cluster_centers.append((x, y)) + else: + # No close points yet, add this one + filtered_cluster_centers.append((x, y)) + + filtered_cluster_centers = np.array(filtered_cluster_centers) + + # Overlay scatter points at filtered cluster centers + scatter = ax.scatter(filtered_cluster_centers[:, 0], filtered_cluster_centers[:, 1], + c='red', marker='o', s=30, edgecolors='black') + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)', fontsize=20) + ax.set_ylabel('Depth (inch)', fontsize=20) + ax.set_aspect(3) + #ax.set_ylim(15, 0) + cbar.ax.tick_params(labelsize=14) # Adjust the font size as needed + + # Set font size for axis labels and ticks + ax.tick_params(axis='both', which='major', labelsize=16) # Adjust the font size as needed + plt.show() + return cluster_centers_inches_m diff --git a/ground-penetrating-radar/Pavement thickness measurement/code/GPR_locate_rebars.py b/ground-penetrating-radar/Pavement thickness measurement/code/GPR_locate_rebars.py new file mode 100644 index 0000000..15bf8a5 --- /dev/null +++ b/ground-penetrating-radar/Pavement thickness measurement/code/GPR_locate_rebars.py @@ -0,0 +1,666 @@ +import sys +import math +import pandas as pd +import numpy as np +from sklearn.preprocessing import minmax_scale +from scipy.signal import find_peaks +from scipy.ndimage import uniform_filter1d +from scipy.constants import c as c +import struct +sys.path.append('C:/directory/path/downloaded_py_files/') +import mig_fk +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +import GPR_plot as plot + +def readdzt(filename, minheadsize = 1024, infoareasize = 128): + ''' + Reads DZT files and returns Pandas dataframe. + Additional details about the header format can be found in the GSSI SIR 3000 Manual pg 55 https://www.geophysical.com/wp-content/uploads/2017/10/GSSI-SIR-3000-Manual.pdf + Parameters: + - filename : DZT file name including path. + - minheadsize : The default is 1024. + - infoareasize : The default is 128. + + Returns: + - df1 : GPR data. + - df2 : GPR configuration settings. + + ''' + + fid = open(filename,'rb'); + + rh_tag = struct.unpack('h', fid.read(2))[0] # Pos 00 // 0x00ff if header, 0xfnff for old file + rh_data = struct.unpack('h', fid.read(2))[0] # Pos 02 // constant 1024 (obsolete) + rh_nsamp = struct.unpack('h', fid.read(2))[0] # Pos 04 // samples per scan + rh_bits = struct.unpack('h', fid.read(2))[0] # Pos 06 // bits per data word (8 or 16) + rh_zero = struct.unpack('h', fid.read(2))[0] # Pos 08 // Offset (0x80 or 0x8000 depends on rh_bits) + rhf_sps = struct.unpack('f', fid.read(4))[0] # Pos 10 // scans per second + rhf_spm = struct.unpack('f', fid.read(4))[0] # Pos 14 // scans per meter + rhf_mpm = struct.unpack('f', fid.read(4))[0] # Pos 18 // meters per mark + rhf_position = struct.unpack('f', fid.read(4))[0] # Pos 22 // position (ns) + rhf_range = struct.unpack('f', fid.read(4))[0] # Pos 26 // range (ns) + rh_npass = struct.unpack('h', fid.read(2))[0] # Pos 30 // num of passes for 2-D files + rhb_cdt = struct.unpack('f', fid.read(4))[0] # Pos 32 // Creation date & time + rhb_mdt = struct.unpack('f', fid.read(4))[0] # Pos 36 // Last modification date & time + rh_mapOffset = struct.unpack('h', fid.read(2))[0] # Pos 40 // offset to range gain function + rh_mapSize = struct.unpack('h',fid.read(2))[0] # Pos 42 // size of range gain function + rh_text = struct.unpack('h',fid.read(2))[0] # Pos 44 // offset to text + rh_ntext = struct.unpack('h',fid.read(2))[0] # Pos 46 // size of text + rh_proc = struct.unpack('h',fid.read(2))[0] # Pos 48 // offset to processing history + rh_nproc = struct.unpack('h',fid.read(2))[0] # Pos 50 // size of processing history + rh_nchan = struct.unpack('h',fid.read(2))[0] # Pos 52 // number of channels + rhf_espr = struct.unpack('f', fid.read(4))[0] # Pos 54 // average dielectric constant + rhf_top = struct.unpack('f',fid.read(4))[0] # Pos 58 // position in meters + rhf_depth = struct.unpack('f',fid.read(4))[0] # Pos 62 // range in meters + fid.close() + + # Figuring out the datatype + if rh_data < minheadsize: + offset = minheadsize*rh_data + else: + offset = minheadsize*rh_nchan + + if rh_bits == 8: + datatype = 'uint8' # unsigned char + elif rh_bits == 16: + datatype = 'uint16' # unsigned int + elif rh_bits == 32: + datatype = 'int32' + + # Organize the data from the reading + vec = np.fromfile(filename,dtype=datatype) + headlength = offset/(rh_bits/8) + datvec = vec[int(headlength):] + if rh_bits == 8 or rh_bits == 16: + datvec = datvec - (2**rh_bits)/2.0 + data = np.reshape(datvec,[int(len(datvec)/rh_nsamp),rh_nsamp]) + data = np.asmatrix(data) + data = data.transpose() + df1 = pd.DataFrame(data) + + # Save the configurations + config = {'minheadsize': minheadsize, + 'infoareasize': infoareasize, + 'rh_tag': rh_tag, + 'rh_data': rh_data, + 'rh_nsamp': rh_nsamp, + 'rh_bits': rh_bits, + 'rh_zero': rh_zero, + 'rhf_sps': rhf_sps, + 'rhf_spm': rhf_spm, + 'rhf_mpm': rhf_mpm, + 'rhf_position': rhf_position, + 'rhf_range': rhf_range, + 'rh_npass': rh_npass, + 'rhb_cdt': rhb_cdt, + 'rhb_mdt': rhb_mdt, + 'rh_mapOffset': rh_mapOffset, + 'rh_mapSize': rh_mapSize, + 'rh_text': rh_text, + 'rh_ntext': rh_ntext, + 'rh_proc': rh_proc, + 'rh_nproc': rh_nproc, + 'rh_nchan': rh_nchan, + 'rhf_espr': rhf_espr, + 'rhf_top': rhf_top, + 'rhf_depth': rhf_depth, + } + + index_config = ['config'] + df2 = pd.DataFrame(config, index = index_config) + df2 = df2.transpose() + + return df1, df2 + +def save_to_csv(dataframe, directory, filename, include_index=False, include_header=False): + ''' + Saves the Pandas dataframe (df1 or df2) from the readdzt function into CSV format. + + Parameters: + - dataframe: df1 or df2. + - directory: directory path to save the files. + - filename: file name. + - include_index: decides including indices or not, default: False. + - include_header: decides including headers or not, default: False. + + ''' + if not directory.endswith('/') and not directory.endswith('\\'): + directory += '/' + + filepath = f"{directory}{filename}.csv" + + # Save dataframe with specified options + dataframe.to_csv(filepath, index=include_index, header=include_header) + +def read_csv(directory): + ''' + Reads the CSV files and convert them as Pandas dataframe (df_1 or df_2). + + Parameters: + - directory: directory path (should be the same with the directory used in the save_to_csv function). + + Returns: + - df_1: GPR scan data. + - df_2: GPR configuration settings. + ''' + if not directory.endswith('/') and not directory.endswith('\\'): + directory += '/' + + filepath_data = f"{directory}{'data'}.csv" + filepath_config = f"{directory}{'config'}.csv" + + # Read dataframe + df_1 = pd.read_csv(filepath_data, header=None) + df_2 = pd.read_csv(filepath_config, index_col=0) + + return df_1, df_2 + +def config_to_variable(df): + ''' + Declares the items in df (GPR configuration settings) as a variable dictionary. + + Parameters: + - df: Pandas DataFrame with index representing variable names and a single column containing the values. + + Returns: + - variables_dict: a dictionary to declare variables. (locals().update(result_variables)) + ''' + # Create an empty dictionary to store variables + variables_dict = {} + + # Allocate variables for config dataframe (df) + for variable_name, variable_value in df.iterrows(): + # Store the variable in the dictionary + variables_dict[variable_name] = variable_value.iloc[0] + + # Adjust the wave traveling time as 0 to value (sometimes position has negative value) + if 'rhf_position' in variables_dict and variables_dict['rhf_position'] != 0: + wave_travel_time = variables_dict['rhf_range'] - variables_dict['rhf_position'] + variables_dict['rhf_position'] = 0 + variables_dict['rhf_range'] = wave_travel_time + + # Return the dictionary containing all variables + return variables_dict + +def Interquartile_Range(df_1, min_value=0.10, max_value=0.90, multiplier=1.5): + ''' + This function clips the DataFrame to remove outliers based on the interquartile range (IQR). + + Parameters: + - df_1: Input DataFrame (GPR scan data). + - min_value: The lower quantile value to calculate Q1 (default is 0.10). + - max_value: The upper quantile value to calculate Q3 (default is 0.90). + - multiplier: A multiplier to control the range for defining outliers (default is 1.5). + + Returns: + - clipped_df: DataFrame with outliers clipped based on the calculated bounds. + + ''' + # Calculate the first and third quartiles (Q1 and Q3) + Q1 = df_1.quantile(min_value).min() + Q3 = df_1.quantile(max_value).max() + + # Calculate the interquartile range (IQR) + IQR = Q3 - Q1 + + # Define the lower and upper bounds for clipping + lower_bound = Q1 - multiplier * IQR + upper_bound = Q3 + multiplier * IQR + + # Clip the entire DataFrame using the calculated bounds + clipped_df = df_1.clip(lower=lower_bound, upper=upper_bound) + clipped_df = clipped_df.astype('float64') + return clipped_df + +def data_chunk(df_1, chunk_size=300): + ''' + Splits the input DataFrame along the GPR survey line (x-axis) into chunks. + + Parameters: + - df_1: Input DataFrame containing GPR scan data. + - chunk_size: The size of each chunk along the x-axis. Default is 300. + + Returns: + - A list containing full chunks of the DataFrame, each of size chunk_size, + and possibly a last chunk containing the remaining data. + ''' + # Calculate the number of full chunks + num_full_chunks = df_1.shape[1] // chunk_size + + # Split the DataFrame into full chunks along the columns and reset the index + df_full_chunks = [df_1.iloc[:, i * chunk_size:(i + 1) * chunk_size].copy().reset_index(drop=True) for i in range(num_full_chunks)] + + # Calculate the start index of the last chunk + last_chunk_start = num_full_chunks * chunk_size + + # Include the leftover chunk + df_last_chunk = df_1.iloc[:, last_chunk_start:].copy().reset_index(drop=True) + + return df_full_chunks + [df_last_chunk] + +def gain(df, type="exp", alpha=0.2, t0=40, tmax=None): + """ + Apply gain to a specific time frame of the data in a DataFrame. + + Parameters: + - df: Input DataFrame with shape (512, 300). + - type: Type of gain ('exp' for exponential, 'pow' for power). + - alpha: Exponent value for the gain. + - t0: Start time of the gain application. + - tmax: Maximum time for gain application. + + Returns: + - df_g: DataFrame after applying gain. + """ + t = np.arange(df.shape[0], dtype=np.float64) # Assuming time indices are represented by column indices + + # Determine the time indices where the gain is applied + if tmax is not None: + mask = (t >= t0) & (t <= tmax) + else: + mask = (t >= t0) + + # Apply gain based on the specified type to each row in the DataFrame + if type == "exp": + df_g = df.copy() + expont_df = pd.DataFrame(np.exp(alpha * (t[mask] - t0)), index=df_g.loc[mask, :].index) + df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values + + elif type == "pow": + df_g = df.copy() + expont_df = pd.DataFrame(((t[mask] - t0) ** alpha), index=df_g.loc[mask, :].index) + df_g.loc[mask, :] = df.loc[mask, :] * expont_df.values + else: + raise ValueError("Invalid type. Use 'exp' or 'pow'.") + + return df_g + +def dewow(df): + ''' + Dewow to correct the baseline of the wave. + Reference: https://github.com/iannesbitt/readgssi/blob/master/readgssi/functions.py + + Parameters: + - df: DataFrame with signal data. + + Returns: + - dewowed_df: DataFrame with dewowed signal. + - average_predicted_values: Numpy array with average predicted values. + ''' + # Fit the polynomial model for each column and average the predicted values + predicted_values = np.zeros_like(df.values, dtype=float) + + for i, column in enumerate(df.columns): + signal_column = df[column] + model = np.polyfit(range(len(signal_column)), signal_column, 3) + predicted_values[:, i] = np.polyval(model, range(len(signal_column))) + + average_predicted_values = np.mean(predicted_values, axis=1) + + # Apply the filter to each column + dewowed_df = df - average_predicted_values[:, np.newaxis] + + return dewowed_df + +def bgr(ar, win=0): + ''' + Horizontal background removal. It uses the moving average method to cancel out continuous horizontal signal along size of the window. + + Parameters: + - ar: GPR B-scan Pandas dataframe. + - win: window for uniform_filter1d. + + Returns: + - ar_copy: Dataframe after background removal. + ''' + # Make a copy of the input array + ar_copy = ar.copy() + + window = int(win) + + # Perform background removal on the copied array + ar_copy -= uniform_filter1d(ar_copy, size=window, mode='nearest') + + # Return the modified copy + return ar_copy + +def Timezero_mean(df_1, rhf_position, rhf_range): + ''' + Mean time-zero correction. + + Parameters: + - df_1 : GPR B-scan Pandas dataframe. + - rhf_position : The starting point for measuring positions in your GPR data (ns). + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + + Returns: + - adjusted_time0data_t : dataframe after time zero correction. + - time0linspace: The discrete time value at the mean time. + - new_rh_nsamp : The number of rows after cut out. + ''' + time0array = [] + # Define time space (n) along with depth dimension + n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) + # from 0 to scans + for i in range(0, df_1.shape[1]): + # find peaks + temp = df_1[i] + temp = minmax_scale(temp, feature_range=(-1, 1)) + peaks, _ = find_peaks(temp, prominence=0.1, distance = 40) + + # index of the peaks + neg_peaks = [] + for peak in peaks: + if peak > 100: + neg_peaks.append(peak) + # time0array contains the indices for time zero + time0array.append(n[neg_peaks[0]] - 0.06) #0.06 is the small gap for showing the first peaks + + # time0linspace is the average time zero index in the time space (n) + time0linspace = next((i for i, value in enumerate(n) if value >= np.mean(time0array)), None) + # cut out indices before the first positive peak + time0data = df_1[time0linspace:-1] if time0linspace is not None else [] + new_rh_nsamp = time0data.shape[0] + + return time0data, time0linspace, new_rh_nsamp + +def Timezero_individual(df_1, rhf_position, rhf_range): + ''' + Scan-by-scan Time-zero correction. + + Parameters: + - df_1 : GPR B-scan Pandas dataframe. + - rhf_position : The starting point for measuring positions in your GPR data (ns). + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + + Returns: + - adjusted_time0data_t : dataframe after time zero correction. + - new_rh_nsamp : The number of rows after cut out. + ''' + + first_peaks_index = [] + time0data_cutout = [] + time0linspace = [] + time0data_reindex = [] + #Define time space (n) along with depth dimension + n = np.linspace(rhf_position, rhf_range, df_1.shape[0]) + #from 0 to scans + for i in range(df_1.shape[1]): + temp = df_1.iloc[:, i] # Access the ith column using iloc + temp = minmax_scale(temp, feature_range=(-1, 1)) + peaks, _ = find_peaks(temp, prominence=0.2, distance=15) + + #first peaks + first_peaks_index.append(peaks[0]) + + #time0linspace is the average time zero index in the time space (n) + time0linspace.append(n[peaks[0]]) + + #time0data_cutout is cut out indices before the first positive peak + time0data_cutout.append(df_1.iloc[:, i][peaks[0]:-1]) + + #new index is for time zeroing based on the 1st positive peak (depth indices are adjusted based on the 1st positive peak) + new_index = np.arange(-peaks[0], -peaks[0] + len(df_1)) + + #reindexed dataframe (without cutting) + df_reindexed = (new_index, np.array(df_1.iloc[:, i])) + #reindexed time0data + time0data_reindex.append(df_reindexed) + + x=[] + y=[] + + #Need to reindex again since the data length and order is not consistant. We cut out the uncommon indices + for i in range (0, df_1.shape[1], 1): + x.append(np.arange(0, len(time0data_cutout[i]),1)) + y.append(time0data_cutout[i]) + + #Calculate common index based on max and min value of x + common_range = np.arange(max(map(min, x)), min(map(max, x)) + 1) + adjusted_time0data = [] + + #Append the cut out data in the data frame + for i in range (0, df_1.shape[1], 1): + df_temp = pd.DataFrame({'X': x[i], 'Y': y[i]}) + df_temp_common_range = df_temp[df_temp['X'].isin(common_range)] + adjusted_time0data.append(np.array(df_temp_common_range['Y'])) + adjusted_time0data = pd.DataFrame(adjusted_time0data) + adjusted_time0data_t = adjusted_time0data.transpose() + new_rh_nsamp = adjusted_time0data_t.shape[0] + return adjusted_time0data_t, new_rh_nsamp + +def FK_migration(data, rhf_spm, rhf_sps, rhf_range, rh_nsamp, rhf_espr): + ''' + F-K migration. Recommend changing the dielectric if the migration result is poor. + + Parameters: + - data : GPR B-scan dataframe after time-zero correction. + - rhf_spm : Scans per meter. + - rhf_sps : Scans per second. + - rhf_range : The time it takes for the radar signals to travel to the subsurface and return (ns). + - rh_nsamp : The number of rows in the DataFrame. + - rhf_espr : Dielectric constant of the media. + + Returns: + - migrated_data : Migrated DataFrame. + - profilePos : Linear space of the x-axis (along the survey line) after migration. + - dt : Time space interval. + - dx : x space interval. + - velocity : Average velocity. + + ''' + # Calculate pos_x and dx based on the scans per second or scans per meter + if rhf_spm != 0: + pos_x, dx = np.linspace(0.0, data.shape[1]/rhf_spm, data.shape[1], retstep=True) + profilePos = pos_x + else: + time_x, dx = np.linspace(0.0, data.shape[1]/rhf_sps, data.shape[1], retstep=True) + profilePos = time_x + + # Calculate the Two-way traveling time and time interval + twtt, dt = np.linspace(0, rhf_range, int(rh_nsamp), retstep=True) + + # Calculate velocity based on the dielectric constant in the configuration (df2) + velocity = (c)/math.sqrt(rhf_espr) * 1e-9 #m/ns + + # Apply F-K migration + migrated_data,twtt,migProfilePos = mig_fk.fkmig(data, dt, dx, velocity) + + # Update Linear space of the x-axis + profilePos = migProfilePos + profilePos[0] + + return migrated_data, profilePos, dt, dx, velocity + + +def Locate_rebar(migrated_data, rhf_depth, rh_nsamp, profilePos, amplitude_threshold = 0.70, depth_threshold = 0.15, num_clusters = 14, random_state = 42, midpoint_factor=0.4): + ''' + Locates rebar positions in migrated data based on specified parameters. (Old version) + + Parameters: + - migrated_data: Input DataFrame containing migrated data. + - rhf_depth: Depth (m). + - rh_nsamp: The number of rows in the DataFrame. + - amplitude_threshold: Threshold for rebar amplitude detection (Gets more points when the value is low and vice versa). + - depth_threshold: Threshold to skip the 1st positive and negative peak (Plot the migrated result first, and determine the value). + - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). + - random_state: Random seed for reproducibility in clustering (default is 42). + - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). + + Returns: + - rebar_positions: DataFrame containing the detected rebar positions. + + ''' + + fig, ax = plt.subplots(figsize=(15, 2)) + + # Calculate depth per point and depth axis in inches + depth_per_point = rhf_depth / rh_nsamp + depth_axis = np.linspace(0, depth_per_point * len(migrated_data) * 39.37, len(migrated_data)) + + # Convert survey line axis to inches + survey_line_axis = profilePos * 39.37 + + vmin, vmax = migrated_data.min(), migrated_data.max() + + # Calculate the midpoint based on the provided factor + midpoint = vmin + (vmax - vmin) * midpoint_factor + + cmap = plt.cm.gray + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) + + heatmap = ax.imshow(migrated_data, cmap='Greys_r', extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()], norm=norm) + + # Add a colorbar + cbar = plt.colorbar(heatmap, ax=ax) + + # Normalize the data + normalized_migrated_data = minmax_scale(migrated_data, feature_range=(-1, 1)) + + # Create meshgrid of indices + x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) + + # Highlight data points with values higher than threshold + threshold = amplitude_threshold + # Skip the first positive peak based on the depth threshold you defined + threshold_index = int(depth_threshold * normalized_migrated_data.shape[0]) + highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)], + y_indices[(normalized_migrated_data > threshold) & (y_indices > threshold_index)])) + + # Use KMeans clustering to identify representative points + num_clusters = num_clusters + kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(highlighted_points) + + # Get the cluster centers + cluster_centers_m = kmeans.cluster_centers_ + + # Convert cluster centers to inches + cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], + cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) + + # Overlay scatter points at cluster centers + scatter = ax.scatter(cluster_centers_inches_m[:, 0], cluster_centers_inches_m[:, 1], + c='red', marker='o', s=50, edgecolors='black') + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)') + ax.set_ylabel('Depth (inch)') + + # Show the plot + return plt.show() + +def custom_minmax_scale(data, new_min, new_max): + ''' + Custom minmax scale function without losing resolution. GPR B-scan amplitude resolution goes bad with scikit-learn minmax scale. + + Parameters: + - data: numpy array. + - new_min: the minimum value in your normalization. + - new_max: the maximum value in your normalization. + + Returns: + - scaled_data: Normalized numpy array. + ''' + min_val = data.min() + max_val = data.max() + scaled_data = ((data - min_val) / (max_val - min_val)) * (new_max - new_min) + new_min + return scaled_data + +def locate_rebar_consecutive(migrated_data, velocity, rhf_range, rh_nsamp, profilePos, + vmin=0.15, vmax=0.7, amplitude_threshold=0.55, depth_threshold=8, + minimal_y_index=10, num_clusters=50, random_state=42, + redundancy_filter=0.6): + ''' + Locates rebar positions in migrated data based on specified parameters. + + Parameters: + - migrated_data: migrated GPR B-scan numpy array . + - velocity: The wave speed in the media. (c)/math.sqrt(rhf_espr) * 1e-9 m/ns + - rhf_range: The time it takes for the radar signals to travel to the subsurface and return (ns). + - rh_nsamp: The number of rows in the GPR B-scan. + - profilePos: x axis (survey line axis) after migration. + - vmin, vmax: The parameter used in visualizing B-scans. Controls B-scan amplitude contrast ranges. + - amplitude_threshold: Threshold for rebar amplitude detection. + - depth_threshold: The maximum depth of the bridge in inches. + - minimal_y_index: The initial y-axis index of the B-scan to skip the 1st positive peak from the rebar counting. + - num_clusters: Number of clusters for k-means clustering (Count the white spots in the migrated plot, and put that number here). + - random_state: Random seed for reproducibility in clustering (default is 42). + - midpoint_factor: Controls the contrast of the plot. (Higher value outputs more darker plot and vice versa). + + Returns: + - figure: Scatter rebar points on the migrated B-scan. + + ''' + if rh_nsamp != len(migrated_data): + raise ValueError("Length of migrated_data should be equal to rh_nsamp") + + fig, ax = plt.subplots(figsize=(15, 12)) + # Calculate depth per point and depth axis in inches + depth = (velocity/2) * rhf_range # one way travel (m) + depth_per_point = depth / rh_nsamp # (m) + depth_axis = np.linspace(0, depth_per_point * rh_nsamp * 39.37, rh_nsamp) + + # Convert survey line axis to inches + survey_line_axis = profilePos * 39.37 + + new_min = 0 + new_max = 1 + + # Normalize the data + normalized_migrated_data = custom_minmax_scale(migrated_data, new_min, new_max) + + # Plot the heatmap with custom colormap + cmap = plt.cm.Greys_r + norm = mcolors.Normalize(vmin=vmin, vmax=vmax) + heatmap = ax.imshow(normalized_migrated_data, cmap=cmap, norm=norm, extent=[survey_line_axis.min(), survey_line_axis.max(), depth_axis.max(), depth_axis.min()]) + cbar = plt.colorbar(heatmap, ax=ax, shrink=0.5) + # Create meshgrid of indices + x_indices, y_indices = np.meshgrid(np.arange(normalized_migrated_data.shape[1]), np.arange(normalized_migrated_data.shape[0])) + threshold = amplitude_threshold # Adjust this threshold based on your data + bridge_depth = int(depth_threshold / (depth_per_point * 39.37)) + highlighted_points = np.column_stack((x_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)], + y_indices[(normalized_migrated_data > threshold) & (y_indices < bridge_depth) & (y_indices > minimal_y_index)])) + + # Use KMeans clustering to identify representative points + num_clusters = num_clusters + kmeans = KMeans(n_clusters=num_clusters, random_state=random_state).fit(highlighted_points) + + # Get the cluster centers + cluster_centers_m = kmeans.cluster_centers_ + + # Convert cluster centers to inches + cluster_centers_inches_m = np.column_stack((cluster_centers_m[:, 0] * survey_line_axis.max() / normalized_migrated_data.shape[1], + cluster_centers_m[:, 1] * depth_axis.max() / normalized_migrated_data.shape[0])) + + # Remove points that are very close together and deeper, keeping only the one with the smaller y value + filtered_cluster_centers = [] + for x, y in cluster_centers_inches_m: + close_points = [(x2, y2) for x2, y2 in filtered_cluster_centers if abs(x2 - x) < redundancy_filter] + if close_points: + # There are already points close to this one + if y < min(close_points, key=lambda p: p[1])[1]: + # This point has a smaller y value, replace the existing ones + filtered_cluster_centers = [p for p in filtered_cluster_centers if (abs(p[0] - x) >= redundancy_filter) or (abs(p[1] - y) >= redundancy_filter)] + filtered_cluster_centers.append((x, y)) + else: + # No close points yet, add this one + filtered_cluster_centers.append((x, y)) + + filtered_cluster_centers = np.array(filtered_cluster_centers) + + # Overlay scatter points at filtered cluster centers + scatter = ax.scatter(filtered_cluster_centers[:, 0], filtered_cluster_centers[:, 1], + c='red', marker='o', s=30, edgecolors='black') + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)', fontsize=20) + ax.set_ylabel('Depth (inch)', fontsize=20) + ax.set_aspect(3) + #ax.set_ylim(15, 0) + cbar.ax.tick_params(labelsize=14) # Adjust the font size as needed + + # Set font size for axis labels and ticks + ax.tick_params(axis='both', which='major', labelsize=16) # Adjust the font size as needed + plt.show() + return cluster_centers_inches_m diff --git a/ground-penetrating-radar/Pavement thickness measurement/code/GPR_plot.py b/ground-penetrating-radar/Pavement thickness measurement/code/GPR_plot.py new file mode 100644 index 0000000..a738c56 --- /dev/null +++ b/ground-penetrating-radar/Pavement thickness measurement/code/GPR_plot.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Dec 19 09:30:58 2023 + +@author: steve.yang.ctr +""" +import numpy as np +from sklearn.preprocessing import minmax_scale +from scipy.signal import find_peaks +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors + + +def Plot_a_scan(data): + ''' + Plot individual GPR scans with identified peaks. + + Parameters: + - data: GPR Pandas dataframe + ''' + for i in range(0, data.shape[1]): + temp = data[i] + temp = minmax_scale(temp, feature_range=(-1, 1)) + peaks, _ = find_peaks(temp, prominence=0.1, distance = 40) + if i % 150 == 1: + plt.plot(temp) + plt.plot(peaks, temp[peaks], 'rx', label='Peaks') + plt.ylabel('Value') + plt.title('Plot of %i th scan' %i) + plt.show() + +def Plot_b_scan(data): + ''' + Plot a GPR B-scan. + + Parameters: + - data: GPR Pandas dataframe + ''' + fig, ax = plt.subplots(figsize=(12, 4)) + heatmap = ax.imshow(data, cmap='gray', aspect='auto') + + ax.set_ylim(data.shape[0], 0) + cbar = plt.colorbar(heatmap) + plt.show() + +def Plot_b_scan_advanced(data, midpoint_factor=0.4): + ''' + Plot an advanced GPR B-scan with adjustable midpoint. + + Parameters: + - data: GPR Pandas dataframe + - midpoint_factor: Factor controlling the midpoint of the colormap. + ''' + fig, ax = plt.subplots(figsize=(15, 6)) + + vmin = np.nanmin(data) + vmax = np.nanmax(data) + + # Calculate midpoint + midpoint = vmin + (vmax - vmin) * midpoint_factor + + cmap = plt.cm.gray + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) + + heatmap = ax.imshow(data, cmap=cmap, norm=norm, aspect='auto') + + cbar = plt.colorbar(heatmap, ax=ax) + ax.set_ylim(data.shape[0], 0) + # Set font size for legend + cbar.ax.tick_params(labelsize=14) # Adjust the font size as needed + + # Set font size for axis labels and ticks + ax.tick_params(axis='both', which='major', labelsize=16) # Adjust the font size as needed + plt.show() + +def Plot_migrated(data): + ''' + Plot migrated GPR data. + + Parameters: + - data: Migrated GPR numpy array + + Returns: + None + ''' + fig, ax = plt.subplots(figsize=(12, 4)) + + heatmap = ax.imshow(data, cmap='Greys_r', aspect='auto') + + cbar = plt.colorbar(heatmap, ax=ax) + + ax.set_xlabel('GPR Survey line index') + ax.set_ylabel('Depth index') + plt.show() + +def Plot_migrated_advanced(data, profilePos, velocity, rhf_range, rh_nsamp, midpoint_factor=0.4): + ''' + Plot an advanced migrated GPR data with adjustable midpoint and depth calculation. + + Parameters: + - data: 2D array representing migrated GPR data. + - profilePos: x axis (survey line axis) after migration + - velocity: The wave speed in the media. (c)/math.sqrt(rhf_espr) * 1e-9 m/ns + - rhf_range: The time it takes for the radar signals to travel to the subsurface and return (ns) + - rh_nsamp: The number of rows in the GPR B-scan + - midpoint_factor: Factor controlling the midpoint of the colormap. + ''' + # mean time zero with 1st positive peak cut + fig, ax = plt.subplots(figsize=(15, 5)) # Increase the height of the plot + depth = (velocity/2) * rhf_range + depth_per_point = depth / rh_nsamp + depth_axis = np.linspace(0, depth_per_point * len(data) * 39.37, len(data)) + survey_line_axis = profilePos * 39.37 + + vmin, vmax = data.min(), data.max() + + # Calculate the midpoint based on the provided factor + midpoint = vmin + (vmax - vmin) * midpoint_factor + + cmap = plt.cm.gray + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) + + heatmap = ax.imshow(data, cmap='Greys_r', extent=[survey_line_axis.min(), survey_line_axis.max(), + depth_axis.max(), depth_axis.min()], norm=norm) + + # Add colorbar for better interpretation + cbar = plt.colorbar(heatmap, ax=ax) + + # Add the red zone between y=2.5 and y=3 inches with red color and alpha=0.5 + #ax.axhspan(2.5, 3.5, facecolor='red', alpha=0.5) + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)') + ax.set_ylabel('Depth (inch)') + + # Adjust the aspect ratio to magnify the y-axis + ax.set_aspect(5) + ax.set_ylim(15, 0) + # Show the plot + return plt.show() + +def Plot_migrated_rebarmaps(data, profilePos, velocity, rhf_range, rh_nsamp, midpoint_factor=0.4): + ''' + Plot an advanced migrated GPR data with adjustable midpoint and depth calculation. + + Parameters: + - data: 2D array representing migrated GPR data. + - profilePos: x axis (survey line axis) after migration + - velocity: The wave speed in the media. (c)/math.sqrt(rhf_espr) * 1e-9 m/ns + - rhf_range: The time it takes for the radar signals to travel to the subsurface and return (ns) + - rh_nsamp: The number of rows in the GPR B-scan + - midpoint_factor: Factor controlling the midpoint of the colormap. + ''' + # mean time zero with 1st positive peak cut + fig, ax = plt.subplots(figsize=(15, 12)) # Increase the height of the plot + depth = (velocity/2) * rhf_range + depth_per_point = depth / rh_nsamp + depth_axis = np.linspace(0, depth_per_point * len(data) * 39.37, len(data)) + survey_line_axis = profilePos * 39.37 + + vmin, vmax = data.min(), data.max() + + # Calculate the midpoint based on the provided factor + midpoint = vmin + (vmax - vmin) * midpoint_factor + + cmap = plt.cm.gray + norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=midpoint, vmax=vmax) + + heatmap = ax.imshow(data, cmap='Greys_r', extent=[survey_line_axis.min(), survey_line_axis.max(), + depth_axis.max(), depth_axis.min()], norm=norm) + + # Add colorbar for better interpretation + cbar = plt.colorbar(heatmap, ax=ax, shrink=0.5) + + # Add the red zone between y=2.5 and y=3 inches with red color and alpha=0.5 + #ax.axhspan(2.5, 3.5, facecolor='red', alpha=0.5) + + # Set labels for axes + ax.set_xlabel('GPR Survey line (inch)', fontsize=20) + ax.set_ylabel('Depth (inch)', fontsize=20) + cbar.ax.tick_params(labelsize=14) # Adjust the font size as needed + + # Set font size for axis labels and ticks + ax.tick_params(axis='both', which='major', labelsize=16) # Adjust the font size as needed + + # Adjust the aspect ratio to magnify the y-axis + ax.set_aspect(3) + #ax.set_ylim(15, 0) + # Show the plot + return plt.show() \ No newline at end of file diff --git a/ground-penetrating-radar/Pavement thickness measurement/code/Pavement_thickness_detection.py b/ground-penetrating-radar/Pavement thickness measurement/code/Pavement_thickness_detection.py index 42d42f0..cd66bef 100644 --- a/ground-penetrating-radar/Pavement thickness measurement/code/Pavement_thickness_detection.py +++ b/ground-penetrating-radar/Pavement thickness measurement/code/Pavement_thickness_detection.py @@ -10,6 +10,7 @@ from scipy.signal import find_peaks import sys sys.path.append('/Workspace/Users/steve.yang.ctr@dot.gov/Pavement_thickness/') +import GPR_locate_rebars as gpr_lr from matplotlib import pyplot as plt from matplotlib.ticker import MultipleLocator, AutoMinorLocator import matplotlib.colors as mcolors diff --git a/ground-penetrating-radar/Pavement thickness measurement/code/mig_fk.py b/ground-penetrating-radar/Pavement thickness measurement/code/mig_fk.py new file mode 100644 index 0000000..83058ac --- /dev/null +++ b/ground-penetrating-radar/Pavement thickness measurement/code/mig_fk.py @@ -0,0 +1,428 @@ +#! /usr/bin/env python +# +# Implements an FK (Stolt) migration routine. +# +# Dmig, tmig, xmig = fkmig(D, dt, dx, v, params) +# +# D data array +# dt temporal sample rate +# dx spatial sample rate +# v constant migration velocity +# params migration parameters (not yet implemented) +# +# Code translated from CREWES MATLAB algorithm, and similar restrictions +# presumably apply. The original license terms are included below. +# +# BEGIN TERMS OF USE LICENSE +# +# This SOFTWARE is maintained by the CREWES Project at the Department +# of Geology and Geophysics of the University of Calgary, Calgary, +# Alberta, Canada. The copyright and ownership is jointly held by +# its author (identified above) and the CREWES Project. The CREWES +# project may be contacted via email at: crewesinfo@crewes.org +# +# The term 'SOFTWARE' refers to the Matlab source code, translations to +# any other computer language, or object code +# +# Terms of use of this SOFTWARE +# +# 1) Use of this SOFTWARE by any for-profit commercial organization is +# expressly forbidden unless said organization is a CREWES Project +# Sponsor. +# +# 2) A CREWES Project sponsor may use this SOFTWARE under the terms of the +# CREWES Project Sponsorship agreement. +# +# 3) A student or employee of a non-profit educational institution may +# use this SOFTWARE subject to the following terms and conditions: +# - this SOFTWARE is for teaching or research purposes only. +# - this SOFTWARE may be distributed to other students or researchers +# provided that these license terms are included. +# - reselling the SOFTWARE, or including it or any portion of it, in any +# software that will be resold is expressly forbidden. +# - transfering the SOFTWARE in any form to a commercial firm or any +# other for-profit organization is expressly forbidden. +# +# + +#from __future__ import print_function +import math +import numpy as np +import pdb, traceback +from tqdm import tqdm + +def csinci(): + """ Complex valued sinc function interpolation. + + trout = csinci(trin, t, tout, sizetable) + """ + +def fktran(D, t, x, ntpad=None, nxpad=None, percent=0., ishift=1): + """ F-K transform using fft on time domain and ifft on space domain. """ + nsamp = D.shape[0] + ntr = D.shape[1] + + if len(t) != nsamp: + raise Exception('Time domain length is inconsistent in input') + if len(x) != ntr: + raise Exception('Space domain length is inconsistent in input') + + if ntpad is None: + ntpad = 2**nextpow2(t) + if nxpad is None: + nxpad = 2**nextpow2(x) + + # Get real values of transform with fftrl + specfx, f = fftrl(D, t, percent, ntpad) + + # Taper and pad in space domain + if percent > 0.: + mw = np.tile(mwindow(ntr, percent), (ntr, 1)) + specfx = specfx * mw + if ntr < nxpad: + ntr = nxpad # this causes ifft to apply the x padding + + spec = np.fft.ifft(specfx.T, n=ntr, axis=0).T + # Compute kx + kxnyq = 1. / (2. * (x[1] - x[0])) + dkx = 2. * kxnyq / ntr + kx = np.hstack([np.arange(0, kxnyq, dkx), np.arange(-kxnyq, 0, dkx)]) + + if ishift: + tmp = zip(kx, spec) + tmp.sort() + kx = [i[0] for i in tmp] + spec = [i[1] for i in tmp] + return spec, f, kx + + +def fftrl(s, t, percent=0.0, n=None): + """ Returns the real part of the forward Fourier transform. """ + # Determine the number of traces in ensemble + l = s.shape[0] + m = s.shape[1] + ntraces = 1 + itr = 0 # transpose flag + if l == 1: + nsamps = m + itr = 1 + s = s.T + elif m == 1: + nsamps = l + else: + nsamps = l + ntraces = m + if nsamps != len(t): + t = t[0] + (t[1] - t[0]) * np.arange(0, nsamps) + if n is None: + n = len(t) + + # Apply the taper + if percent > 0.0: + mw = np.tile(mwindow(nsamps, percent), (ntraces, 1)) + s = s * mw + # Pad s if needed + if nsamps < n: + s = np.vstack([s, np.zeros([n-nsamps, ntraces])]) + nsamps = n + + # Do the transformation + spec = np.fft.fft(s, n=nsamps, axis=0) + spec = spec[:int(n/2)+1, :] # save only positive frequencies + + # Build the frequency vector + fnyq = 1. / (2 * (t[1] - t[0])) + nf = spec.shape[0] + df = 2.0 * fnyq / n + f = df * np.arange(0,nf).T + if itr: + f = f.T + spec = spec.T + return spec, f + +def ifktran(spec, f, kx, nfpad=None, nkpad=None, percent=0.0): + """ Inverse f-k transform. + Arguments: + spec complex valued f-k series + f frequency components for rows of spec + kx wavenumber components for columns of spec + nfpad size to pad spec rows to + nkpad size to pad spec columns to + percent controls cosine taper + + Returns: + D 2-d array; one trace per column + t time coordinates for D + x space coordinates for D + """ + nf,nkx = spec.shape + + if len(f) != nf: + raise Exception('frequency coordinate vector is wrong size') + elif len(kx) != nkx: + raise Exception('wavenumber coordinate vector is wrong size') + + if nfpad is None: + nfpad = 2**nextpow2(len(f)) + if nkpad is None: + nkpad = 2**nextpow2(len(kx)) + + # Determine if kx needs to be wrapped + if kx[0] < 0.0: + # Looks unwrapped (is this wise?) + ind = kx >= 0.0 + kx = np.hstack([kx[ind], kx[np.arange(ind[0])]]) + spec = np.hstack([spec[:,ind], spec[:,np.arange(ind[0])]]) + else: + ind = False + + # Taper and pad in kx + if percent > 0.0: + mw = mwindow(nkx, percent) + if ind.any(): + mw = np.hstack([mw[ind], mw[np.arange(ind[0])]]) + mw = mw.repeat(nkz, axis=0) + spec = spec * mw + if nkx < nkpad: + nkx = nkpad + + # Performs the transforms + specfx = np.fft.fft(spec, nkx) + D, t = ifftrl(specfx, f) + + # Compute x + dkx = kx[1] - kx[0] + xmax = 1.0 / dkx + dx = xmax / nkx + x = np.arange(0, xmax, dx) + return D, t, x + +def ifftrl(spec, f): + """ Inverse Fourier transform for real-valued series. + Arguments: + spec input spectrum + f input frequency coordinates + Returns: + r output trace + t output time vector + """ + m,n = spec.shape # Will be a problem if spec is 1-dimensional + itr = 0 + if (m == 1) or (n == 1): + if m == 1: + spec = spec.T + itr = 1 + nsamp = len(spec) + ntr = 1 + else: + nsamp = m + ntr = n + + # Form the conjugate symmetric complex spectrum expected by ifft + # Test for nyquist + nyq = 0 + if (spec[-1] == np.real(spec[-1])).all(): + nyq = 1 + if nyq: + L1 = np.arange(nsamp) + L2 = L1[-2:0:-1] + else: + L1 = np.arange(nsamp) + L2 = L1[-2:0:-1] # WTF? -njw + symspec = np.vstack([spec[L1,:], np.conj(spec[L2,:])]) + # Transform the array + r = (np.fft.ifft(symspec.T)).real.T + # Build the time vector + n = len(r) + df = f[1] - f[0] + dt = 1.0 / (n*df) + t = dt * np.arange(n).T + if itr == 1: + r = r.T + t = t.T + return r, t + +def mwindow(n, percent=10.): + """ Creates a boxcar window with raised-cosine tapers. """ + if type(n) is not int and type(n) is not float: + n = len(n) + # Compute the hanning function + if percent > 50. or percent < 0.: + raise Exception('Invalid percent in function mwindow (={0})'.format(percent)) + m = 2.0 * math.floor(percent * n / 100.) + h = np.hanning(m) + return np.hstack([h[:m/2], np.ones([n-m]), h[m/2:]]) + +def mwhalf(n, percent=10.): + """ Half mwindow. """ + if type(n) is not int and type(n) is not float: + n = len(n) + # Compute the hanning function + if percent > 100. or percent < 0.: + raise Exception('Invalid percent in function mwhalf (={0})'.format(percent)) + m = int(math.floor(percent * n / 100.)) + h = np.hanning(2*m) + return np.hstack([np.ones([n-m]), h[m:0:-1]]) + +def nextpow2(a): + """ Gives the next power of 2 larger than a. """ + return np.ceil(np.log(a) / np.log(2)).astype(int) + +def fkmig(D, dt, dx, v, params=None): + + nsamp = D.shape[0] + ntr = D.shape[1] + t = np.arange(0, nsamp) * dt + x = np.arange(0, ntr) * dx + interpolated = True + + fnyq = 1.0 / (2.0*dt) + knyq = 1.0 / (2.0*dx) + tmax = t[-1] + xmax = abs(x[-1]-x[0]) + + # Deal with parameters + if params == None: + fmax = 0.6 * fnyq + fwid = 0.2 * (fnyq - fmax) + dipmax = 85.0 + dipwid = 90.0 - dipmax + tpad = min([0.5 * tmax, abs(tmax / math.cos(math.pi*dipmax / 180.0))]) + xpad = min([0.5 * xmax, xmax / math.sin(math.pi*dipmax / 180.0)]) + padflag = 1 + intflag = 3 + cosflag = 1 + lsinc = 1 + ntable = 25 + mcflag = 0 # Faster, less memory-efficient transform (not implemented) + kpflag = 50.0 + + # Apply padding + # tpad + nsampnew = int(2.0**nextpow2( round((tmax+tpad) / dt + 1.0) )) + tmaxnew = (nsampnew-1)*dt + tnew = np.arange(t[0], tmaxnew+dt, dt) + ntpad = nsampnew-nsamp + D = np.vstack([D,np.zeros([ntpad,ntr])]) + + # xpad + ntrnew = 2**nextpow2( round((xmax+xpad) / dx + 1) ) + xmaxnew = (ntrnew-1)*dx + x[0] + xnew = np.arange(x[0], xmaxnew+dx, dx) + nxpad = ntrnew - ntr + D = np.hstack([D, np.zeros([nsampnew,nxpad])]) + + # Forward f-k transform + fkspec, f, kx = fktran(D, tnew, xnew, nsampnew, ntrnew, 0, 0) + df = f[1] - f[0] + nf = len(f) + + # Compute frequency mask + ifmaxmig = int(round((fmax+fwid) / df + 1.0)) + pct = 100.0 * (fwid / (fmax+fwid)) + fmask = np.hstack([mwhalf(ifmaxmig,pct), np.zeros([nf-ifmaxmig])]) + fmaxmig = (ifmaxmig-1)*df # i.e. fmax+fwid to nearest sample + + # Now loop over wavenumbers + ve = v / 2.0 # exploding reflector velocity + dkz = df / ve + kz = (np.arange(0,len(f)) * dkz).T + kz2 = kz ** 2 + + th1 = dipmax * math.pi / 180.0 + th2 = (dipmax+dipwid) * math.pi / 180.0 + + if th1 == th2: + print("No dip filtering") + + for j,kxi in tqdm(enumerate(kx)): + # Evanescent cut-off + fmin = abs(kxi) * ve + ifmin = int(math.ceil(fmin / df)) + 1 + + # Compute dip mask + if th1 != th2: + # First physical frequency excluding dc + ifbeg = max([ifmin, 1])+1 + # Frequencies to migrate + ifuse = np.arange(ifbeg, ifmaxmig+1) + # if len(ifuse) == 1: # This was before 4/18/2020 + if len(ifuse) <= 1: # Alain changed this on 4/18/2020 + # Special case + dipmask = np.zeros(f.shape) + dipmask[ifuse-1] = 1 + else: + # Physical dips for each frequency + theta = np.arcsin(fmin / f[ifuse]) + # Sample number to begin ramp + if1 = int(round(fmin / (math.sin(th1) * df))) + if1 = max([if1, ifbeg]) + # sample number to end ramp + if2 = int(round(fmin / (math.sin(th2) * df))) + if2 = max([if2, ifbeg]) + # Initialize mask to zeros + dipmask = np.zeros(f.shape) + # Pass these dips + dipmask[if1:nf-1] = 1 + dipmask[if2:if1] = 0.5 + 0.5 * np.cos( + (theta[np.arange(if2, if1) - ifbeg] - th1) + * math.pi / float(th2-th1)) + else: + dipmask = np.ones(f.shape) + + # Apply masks + tmp = fkspec[:, j] * fmask * dipmask + + # Compute f that map to kz + fmap = ve * np.sqrt(kx[j]**2 + kz2) + # Contains one value for each kz giving the frequency + # that maps there to migrate the data + # Many of these frequencies will be far too high + ind = np.vstack(np.nonzero(fmap <= fmaxmig)).T + # ind is an array of indicies of fmap which will always start at 1 + # and end at the highest f to be migrated + + # Now map samples by interpolation + fkspec[:, j] *= 0.0 # initialize output spectrum to zero + if len(ind) != 0: + # Compute cosine scale factor + if cosflag: + if fmap[ind].all() == 0: + scl = np.ones(ind.shape[0]) + li = ind.shape[0] + scl[1:li] = (ve * kz[ind[1:li]] / fmap[ind[1:li]])[:,0] + else: + scl = ve * kz[ind] / fmap[ind] + else: + scl = np.ones(ind.shape[0]) + if intflag == 0: + # Nearest neighbour interpolation + ifmap = (fmap[ind] / df).astype(int) + fkspec[ind, j] = (scl.squeeze() \ + * tmp[ifmap.squeeze()]).reshape([-1,1]) + elif intflag == 1: + # Complex sinc interpolation + fkspec[ind, j] = scl \ + * csinci(tmp, f, fmap[ind], np.hstack([lsinc,ntable])) + elif intflag == 2: + # Spline interpolation + # Not implemented + pass + elif intflag == 3: + # Linear interpolation + r_interp = scl.squeeze() \ + * np.interp(fmap[ind], f, tmp.real).squeeze() + j_interp = scl.squeeze() \ + * np.interp(fmap[ind], f, tmp.imag).squeeze() + fkspec[ind, j] = (r_interp + j_interp * 1j).reshape([-1,1]) + + # Inverse transform + Dmig, tmig, xmig = ifktran(fkspec, f, kx) + + # Remove padding, if desired + if padflag: + Dmig = Dmig[:nsamp, :ntr] + tmig = tmig[:nsamp] + xmig = xmig[:ntr] + + return Dmig, tmig, xmig diff --git a/ground-penetrating-radar/Pavement thickness measurement/notebooks/Pavement_thickness_notebook.ipynb b/ground-penetrating-radar/Pavement thickness measurement/notebooks/Pavement_thickness_notebook.ipynb index e87d9f4..2ec17b2 100644 --- a/ground-penetrating-radar/Pavement thickness measurement/notebooks/Pavement_thickness_notebook.ipynb +++ b/ground-penetrating-radar/Pavement thickness measurement/notebooks/Pavement_thickness_notebook.ipynb @@ -2,7 +2,33 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "909ec066-96f1-4015-b463-39273a986d94", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys \n", + "import os\n", + "sys.path.append('C:/Download/path/Goundcoupled GPR')\n", + "import Pavement_thickness_detection as ptd\n", + "import numpy as np\n", + "import pickle\n", + "from scipy.constants import c as c" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { @@ -20,6 +46,7 @@ "name": "stdout", "output_type": "stream", "text": [ + "H:/GPR pavement thickness/Pavement_thickness_measurement_to_Steve/Goundcoupled GPR/ALF GPR Lane 6, 09-21-23\n", "Now reading data from Lane 6 Pass 1\n", "Median (50th percentile): 4.48 inches\n" ] @@ -128,16 +155,8 @@ } ], "source": [ - "import sys \n", - "import os\n", - "sys.path.append('C:/downloaded/path/Goundcoupled GPR')\n", - "import Pavement_thickness_detection as ptd\n", - "import numpy as np\n", - "import pickle\n", - "from scipy.constants import c as c\n", - "\n", "# Input Module\n", - "Project_folder = 'C:/downloaded/path/Goundcoupled GPR'\n", + "Project_folder = 'C:/Download/path/Goundcoupled GPR/'\n", "\n", "with_outliers = True # Uses outlier removal function for measured pavement thickness. If set to False, returns originally measured data.\n", "positive_pick = True # If set to False, GPR negative amplitude is used to measure the pavement thickness.\n", @@ -152,7 +171,7 @@ "# Process data for the pavement thickness analysis\n", "with os.scandir(Project_folder) as entries:\n", " for entry in entries:\n", - " if entry.is_dir():\n", + " if entry.is_dir() and not entry.name.startswith('_'):\n", " lane_folder = entry.name\n", " folder_current = os.path.join(Project_folder, lane_folder)\n", "\n", @@ -228,12 +247,12 @@ " # Contour plot\n", " colors = ['red', 'yellow', 'lime', 'deepskyblue'] # color designation\n", " bounds = [3.5, 4, 4.5, 5, 6] # Boundaries for each color\n", - " ptd.contour_plot_No_Transition(Y_fine, X_fine, result_interp_fine, colors, bounds, Lane_num) if no_color_transition == True else ptd.contour_plot_default(Y_fine, X_fine, result_interp_fine, Lane_num)\n" + " ptd.contour_plot_No_Transition(Y_fine, X_fine, result_interp_fine, colors, bounds, Lane_num) if no_color_transition == True else ptd.contour_plot_default(Y_fine, X_fine, result_interp_fine, Lane_num)" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { diff --git a/ground-penetrating-radar/Rebar mapping/code/GPR_locate_rebars.py b/ground-penetrating-radar/Rebar mapping/code/GPR_locate_rebars.py index aafa3a6..15bf8a5 100644 --- a/ground-penetrating-radar/Rebar mapping/code/GPR_locate_rebars.py +++ b/ground-penetrating-radar/Rebar mapping/code/GPR_locate_rebars.py @@ -173,7 +173,7 @@ def config_to_variable(df): # Allocate variables for config dataframe (df) for variable_name, variable_value in df.iterrows(): # Store the variable in the dictionary - variables_dict[variable_name] = variable_value[0] + variables_dict[variable_name] = variable_value.iloc[0] # Adjust the wave traveling time as 0 to value (sometimes position has negative value) if 'rhf_position' in variables_dict and variables_dict['rhf_position'] != 0: