From 18452e3a682108c4dda76e43eeea8075eb3bc5db Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 09:57:26 +0530 Subject: [PATCH 1/8] Create Sample --- examples/Fabric/Sample | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/Fabric/Sample diff --git a/examples/Fabric/Sample b/examples/Fabric/Sample new file mode 100644 index 00000000..5692994f --- /dev/null +++ b/examples/Fabric/Sample @@ -0,0 +1 @@ +print("Fabric Notebook") From ef4e2db99d43d7bef0ab3314d06fd8597e67b763 Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 10:01:32 +0530 Subject: [PATCH 2/8] Add files via upload --- examples/Fabric/Zingg_Notebook.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/Fabric/Zingg_Notebook.ipynb diff --git a/examples/Fabric/Zingg_Notebook.ipynb b/examples/Fabric/Zingg_Notebook.ipynb new file mode 100644 index 00000000..e0007e1a --- /dev/null +++ b/examples/Fabric/Zingg_Notebook.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"code","source":["#abfss://Test@onelake.dfs.fabric.microsoft.com/ZinggData.Lakehouse/Files/data.csv\n","spark.sparkContext.setCheckpointDir(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":6,"statement_ids":[6],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.7727126Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:45.3551064Z","execution_finish_time":"2024-12-12T14:38:46.1554742Z","parent_msg_id":"0568e5f6-3102-476c-9119-1eea357e5f90"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 6, Finished, Available, Finished)"},"metadata":{}}],"execution_count":2,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"320825db-e1b4-4106-8f77-d974f59e6fe1"},{"cell_type":"code","source":["pip install zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":7,"statement_ids":[7],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.8919804Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:46.9779028Z","execution_finish_time":"2024-12-12T14:38:59.3086347Z","parent_msg_id":"9a6de53a-f5ed-4655-9341-4c4a7802ffe5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 7, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting zingg\n Downloading zingg-0.4.0-py2.py3-none-any.whl.metadata (933 bytes)\nCollecting py4j==0.10.9 (from zingg)\n Downloading py4j-0.10.9-py2.py3-none-any.whl.metadata (1.3 kB)\nDownloading zingg-0.4.0-py2.py3-none-any.whl (74.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.7/74.7 MB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.6/198.6 kB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: py4j, zingg\n Attempting uninstall: py4j\n Found existing installation: py4j 0.10.9.7\n Uninstalling py4j-0.10.9.7:\n Successfully uninstalled py4j-0.10.9.7\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\npyspark 3.5.1.5.4.20240407 requires py4j==0.10.9.7, but you have py4j 0.10.9 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed py4j-0.10.9 zingg-0.4.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":3,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d45194dd-f9fa-4522-9b8d-f68390a36cb0"},{"cell_type":"code","source":["spark.sparkContext.getCheckpointDir()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":8,"statement_ids":[8],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.0470709Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:59.8920089Z","execution_finish_time":"2024-12-12T14:39:00.1425377Z","parent_msg_id":"a7a3e48d-4f55-4dcc-94db-21864a32cdab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 8, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"'abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/b2adeefa-d873-4af7-9780-3af8598f5959'"},"metadata":{}}],"execution_count":4,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"735117dc-0f56-491b-a805-a16db331c90d"},{"cell_type":"code","source":["pip show zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":9,"statement_ids":[9],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.2324828Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:00.6902784Z","execution_finish_time":"2024-12-12T14:39:04.2406337Z","parent_msg_id":"a041b135-c20d-4db9-9e2b-b8b4718c42dc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 9, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: zingg\r\nVersion: 0.4.0\r\nSummary: Zingg Entity Resolution, Data Mastering and Deduplication\r\nHome-page: https://github.com/zinggAI/zingg\r\nAuthor: Zingg.AI\r\nAuthor-email: sonalgoyal4@gmail.com\r\nLicense: https://github.com/zinggAI/zingg/blob/main/LICENSE\r\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\r\nRequires: py4j\r\nRequired-by: \r\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":5,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"51e5d94a-b1d6-47be-bbf1-98208af1b5d8"},{"cell_type":"code","source":["pip install tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":10,"statement_ids":[10],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.3970144Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:04.8223306Z","execution_finish_time":"2024-12-12T14:39:09.8213294Z","parent_msg_id":"c2bb18f4-faa5-4fc2-b94e-0ccd1e2b6af7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 10, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting tabulate\n Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\nDownloading tabulate-0.9.0-py3-none-any.whl (35 kB)\nInstalling collected packages: tabulate\nSuccessfully installed tabulate-0.9.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":6,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a2e77ae6-eeb2-482f-a47e-8c6ed0e7bb59"},{"cell_type":"code","source":["pip show tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":11,"statement_ids":[11],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.5376703Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:10.4269168Z","execution_finish_time":"2024-12-12T14:39:14.5511724Z","parent_msg_id":"0a38f00a-6e32-4871-aec1-99613a3180bd"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 11, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: tabulate\nVersion: 0.9.0\nSummary: Pretty-print tabular data\nHome-page: \nAuthor: \nAuthor-email: Sergey Astanin \nLicense: MIT\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\nRequires: \nRequired-by: \nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":7,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ed5c6ed3-40ef-4447-ab75-4a6a898814fe"},{"cell_type":"code","source":["##you can change these to the locations of your choice\n","##these are the only two settings that need to change\n","zinggDir = \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/models\"\n","modelId = \"testModelFebrl\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":12,"statement_ids":[12],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.6769995Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.1044655Z","execution_finish_time":"2024-12-12T14:39:15.354016Z","parent_msg_id":"7344a1f2-936d-4266-9e4f-bd76fd51601b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 12, Finished, Available, Finished)"},"metadata":{}}],"execution_count":8,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"c3b77184-4165-495e-b212-521dadef7125"},{"cell_type":"code","source":["## Define constants\n","MARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/marked/\"\n","UNMARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/unmarked/\"\n","\n","# Fill these with your specific details\n","storage_account = \"a1a73dc0-3894-4737-b38c-aa7fea437330\" # Replace with your storage account ID\n","fabric_url = \"dfs.fabric.microsoft.com\"\n","\n","# Updated paths for Microsoft Fabric\n","MARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{MARKED_DIR}\"\n","UNMARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{UNMARKED_DIR}\"\n","\n","## Import necessary libraries\n","import pandas as pd\n","import numpy as np\n","import os\n","import time\n","import uuid\n","from tabulate import tabulate\n","from ipywidgets import widgets, interact, GridspecLayout\n","import base64\n","import pyspark.sql.functions as fn\n","\n","# Import Azure libraries for Fabric\n","from azure.identity import DefaultAzureCredential\n","from azure.storage.filedatalake import DataLakeServiceClient\n","\n","# Zingg libraries\n","from zingg.client import *\n","from zingg.pipes import *\n","\n","# Setup Fabric authentication\n","def get_service_client():\n"," credential = DefaultAzureCredential()\n"," service_client = DataLakeServiceClient(\n"," account_url=f\"https://{storage_account}.dfs.fabric.microsoft.com\",\n"," credential=credential,\n"," )\n"," return service_client\n","\n","service_client = get_service_client()\n","\n","# Function to clean model directories in Fabric\n","def cleanModel():\n"," try:\n"," # Access the file system\n"," file_system_client = service_client.get_file_system_client(file_system=storage_account)\n"," \n"," # Remove marked directory\n"," if file_system_client.get_directory_client(MARKED_DIR).exists():\n"," file_system_client.get_directory_client(MARKED_DIR).delete_directory()\n"," \n"," # Remove unmarked directory\n"," if file_system_client.get_directory_client(UNMARKED_DIR).exists():\n"," file_system_client.get_directory_client(UNMARKED_DIR).delete_directory()\n"," \n"," print(\"Model cleaned successfully.\")\n"," except Exception as e:\n"," print(f\"Error cleaning model: {str(e)}\")\n"," return\n","\n","# Function to assign label to a candidate pair\n","def assign_label(candidate_pairs_pd, z_cluster, label):\n"," '''\n"," The purpose of this function is to assign a label to a candidate pair\n"," identified by its z_cluster value. Valid labels include:\n"," 0 - not matched\n"," 1 - matched\n"," 2 - uncertain\n"," '''\n"," # Assign label\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster'] == z_cluster, 'z_isMatch'] = label\n"," return\n","\n","# Function to count labeled pairs\n","def count_labeled_pairs(marked_pd):\n"," '''\n"," The purpose of this function is to count the labeled pairs in the marked folder.\n"," '''\n"," n_total = len(np.unique(marked_pd['z_cluster']))\n"," n_positive = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 1]['z_cluster']))\n"," n_negative = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 0]['z_cluster']))\n","\n"," return n_positive, n_negative, n_total\n","\n","# Setup interactive widget\n","available_labels = {\n"," 'No Match': 0,\n"," 'Match': 1,\n"," 'Uncertain': 2\n","}\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":13,"statement_ids":[13],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.7920676Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.9184099Z","execution_finish_time":"2024-12-12T14:39:16.7144224Z","parent_msg_id":"c47972cc-56fd-46a9-80fe-da0d20234a5d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 13, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/context.py:113: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n"]}],"execution_count":9,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"fd229c4c-6376-4f4b-89c3-14f78822eef8"},{"cell_type":"code","source":["#build the arguments for zingg\n","args = Arguments()\n","# Set the modelid and the zingg dir. You can use this as is\n","args.setModelId(modelId)\n","args.setZinggDir(zinggDir)\n","print(args)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":14,"statement_ids":[14],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.916886Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:17.2999881Z","execution_finish_time":"2024-12-12T14:39:17.5431547Z","parent_msg_id":"c783d3fd-b7fa-4591-9771-32d42753ddd9"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 14, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\n"]}],"execution_count":10,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"f92fe414-811a-4e02-b11e-9711539d1786"},{"cell_type":"code","source":["# Import pandas\n","import pandas as pd\n","\n","# Define the schema (optional for validation)\n","schema = [\"id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","\n","# Load the CSV file\n","data = pd.read_csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\")\n","\n","# Ensure column names match the schema\n","data.columns = schema # Adjust only if the file's column names differ\n","\n","# Display the data\n","data.head()\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":15,"statement_ids":[15],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.0524493Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:18.126005Z","execution_finish_time":"2024-12-12T14:39:19.6523511Z","parent_msg_id":"619a3f46-252d-4b59-849e-69081583ed29"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 15, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":37,"data":{"text/plain":" id fname lname stNo add1 add2 \\\n0 rec-1021-dup-0 thomas george 1 mcmanus place stoney creek \n1 rec-1021-org thomas george 1 mcmanus place north turramurra \n2 rec-1022-dup-0 jackson eglinton 840 fowles street mountview \n3 rec-1022-dup-1 jackson eglinton 840 fowles street moun tjiew \n4 rec-1022-dup-2 jackson eglinton 840 fowles street mou nview \n\n city state dob ssn \n0 3130 sa 19630225 5460534 \n1 3130 sa 19630225 5460534 \n2 2803 sa 19830807 2932837 \n3 2830 sa 19830807 2932837 \n4 2830 sa 19830807 2932837 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idfnamelnamestNoadd1add2citystatedobssn
0rec-1021-dup-0thomasgeorge1mcmanus placestoney creek3130sa196302255460534
1rec-1021-orgthomasgeorge1mcmanus placenorth turramurra3130sa196302255460534
2rec-1022-dup-0jacksoneglinton840fowles streetmountview2803sa198308072932837
3rec-1022-dup-1jacksoneglinton840fowles streetmoun tjiew2830sa198308072932837
4rec-1022-dup-2jacksoneglinton840fowles streetmou nview2830sa198308072932837
\n
"},"metadata":{}}],"execution_count":11,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a76f4324-ff22-46e1-81b5-16f97ab2835d"},{"cell_type":"code","source":["schema = \"rec_id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, dob string, ssn string\"\n","inputPipe = CsvPipe(\"testFebrl\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\", schema)\n","\n","args.setData(inputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":16,"statement_ids":[16],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.2025787Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:20.2434395Z","execution_finish_time":"2024-12-12T14:39:20.4955338Z","parent_msg_id":"5c8d332f-c5a9-4782-8aa7-923604a75d86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 16, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["set schema \n"]}],"execution_count":12,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d9ed37ff-f408-4f87-bda0-161ad35946fb"},{"cell_type":"code","source":["#setting outputpipe in 'args'\n","outputPipe = CsvPipe(\"resultOutput\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")\n","args.setOutput(outputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":17,"statement_ids":[17],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.3319598Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.0521349Z","execution_finish_time":"2024-12-12T14:39:21.3077047Z","parent_msg_id":"edd9e63e-2f5a-41f8-aec9-be73e860542d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 17, Finished, Available, Finished)"},"metadata":{}}],"execution_count":13,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"3c49f24d-2f15-43e6-8c73-7b77c1199845"},{"cell_type":"code","source":["# Set field definitions\n","rec_id = FieldDefinition(\"rec_id\", \"string\", MatchType.EXACT) # ID should use exact match\n","fname = FieldDefinition(\"fname\", \"string\", MatchType.FUZZY) # First Name\n","lname = FieldDefinition(\"lname\", \"string\", MatchType.FUZZY) # Last Name\n","stNo = FieldDefinition(\"stNo\", \"string\", MatchType.FUZZY) # Street Number\n","add1 = FieldDefinition(\"add1\", \"string\", MatchType.FUZZY) # Address Line 1\n","add2 = FieldDefinition(\"add2\", \"string\", MatchType.FUZZY) # Address Line 2\n","city = FieldDefinition(\"city\", \"string\", MatchType.FUZZY) # City\n","state = FieldDefinition(\"state\", \"string\", MatchType.FUZZY) # State\n","dob = FieldDefinition(\"dob\", \"string\", MatchType.EXACT) # Date of Birth (prefer exact match)\n","ssn = FieldDefinition(\"ssn\", \"string\", MatchType.EXACT) # SSN (should use exact match)\n","\n","# Create the field definitions list\n","fieldDefs = [rec_id, fname, lname, stNo, add1, add2, city, state, dob, ssn]\n","\n","# Set field definitions in args\n","args.setFieldDefinition(fieldDefs)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":18,"statement_ids":[18],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.4720722Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.8641221Z","execution_finish_time":"2024-12-12T14:39:22.1346071Z","parent_msg_id":"71227dea-6926-4e14-9e66-501b8515fa5a"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 18, Finished, Available, Finished)"},"metadata":{}}],"execution_count":14,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"76edaab7-d705-4d05-adaa-298b48f87ae6"},{"cell_type":"code","source":["# The numPartitions define how data is split across the cluster. \n","# Please change the fllowing as per your data and cluster size by referring to the docs.\n","\n","args.setNumPartitions(4)\n","args.setLabelDataSampleSize(0.5)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":19,"statement_ids":[19],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.5771016Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:22.6870105Z","execution_finish_time":"2024-12-12T14:39:23.1094802Z","parent_msg_id":"133bf47a-3e2c-4a69-b874-b68bd3fd0f94"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 19, Finished, Available, Finished)"},"metadata":{}}],"execution_count":15,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ea3a596e-0571-4149-9b5b-d8357226d90c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"findTrainingData\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","print(args)\n","print(options)\n","print(zingg)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":20,"statement_ids":[20],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.7720589Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:23.6806377Z","execution_finish_time":"2024-12-12T14:39:40.4666332Z","parent_msg_id":"88db0a89-5777-4e74-92c3-15e9a461056f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 20, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'findTrainingData']\narguments for client options are ['--phase', 'findTrainingData', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n\n\n\n"]}],"execution_count":16,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"92238689-3e1c-4b32-9802-c59c714aa6d2"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"label\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.init()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":21,"statement_ids":[21],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.8921439Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.0118438Z","execution_finish_time":"2024-12-12T14:39:41.2588634Z","parent_msg_id":"9f835445-3575-444e-be68-698c87047cfa"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 21, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'label']\narguments for client options are ['--phase', 'label', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":17,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"b30911c2-9663-4260-8952-c9e5e0d668ea"},{"cell_type":"code","source":["# get candidate pairs\n","candidate_pairs_pd = getPandasDfFromDs(zingg.getUnmarkedRecords())\n"," \n","# if no candidate pairs, run job and wait\n","if candidate_pairs_pd.shape[0] == 0:\n"," print('No unlabeled candidate pairs found. Run findTraining job ...')\n","\n","else:\n"," # get list of pairs (as identified by z_cluster) to label \n"," z_clusters = list(np.unique(candidate_pairs_pd['z_cluster'])) \n","\n"," # identify last reviewed cluster\n"," last_z_cluster = '' # none yet\n","\n"," # print candidate pair stats\n"," print('{0} candidate pairs found for labeling'.format(len(z_clusters)))"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":22,"statement_ids":[22],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.1173535Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.8216531Z","execution_finish_time":"2024-12-12T14:39:44.3102558Z","parent_msg_id":"6d386eec-27ed-4ac8-8c59-e45bcfa62cc5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 22, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["15 candidate pairs found for labeling\n"]}],"execution_count":18,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e303305a-e747-4807-a788-beecde020545"},{"cell_type":"code","source":["# Label Training Set\n","\n","# define variable to avoid duplicate saves\n","ready_for_save = False\n","print(candidate_pairs_pd)\n","\n","# user-friendly labels and corresponding zingg numerical value\n","# (the order in the dictionary affects how displayed below)\n","LABELS = {\n"," 'Uncertain':2,\n"," 'Match':1,\n"," 'No Match':0 \n"," }\n","\n","# GET CANDIDATE PAIRS\n","# ========================================================\n","#candidate_pairs_pd = get_candidate_pairs()\n","n_pairs = int(candidate_pairs_pd.shape[0]/2)\n","# ========================================================\n","\n","# DEFINE IPYWIDGET DISPLAY\n","# ========================================================\n","display_pd = candidate_pairs_pd.drop(\n"," labels=[\n"," 'z_zid', 'z_prediction', 'z_score', 'z_isMatch', 'z_zsource'\n"," ], \n"," axis=1)\n","\n","# define header to be used with each displayed pair\n","html_prefix = \"

\"\n","html_suffix = \"

\"\n","header = widgets.HTML(value=f\"{html_prefix}\" + \"
\".join([str(i)+\"  \" for i in display_pd.columns.to_list()]) + f\"
{html_suffix}\")\n","\n","# initialize display\n","vContainers = []\n","vContainers.append(widgets.HTML(value=f'

Indicate if each of the {n_pairs} record pairs is a match or not

'))\n","\n","# for each set of pairs\n","for n in range(n_pairs):\n","\n"," # get candidate records\n"," candidate_left = display_pd.loc[2*n].to_list()\n"," print(candidate_left)\n"," candidate_right = display_pd.loc[(2*n)+1].to_list()\n"," print(candidate_right)\n","\n"," # define grid to hold values\n"," html = ''\n","\n"," for i in range(display_pd.shape[1]):\n","\n"," # get column name\n"," column_name = display_pd.columns[i]\n","\n"," # if field is image\n"," if column_name == 'image_path':\n","\n"," # define row header\n"," html += ''\n"," html += 'image'\n","\n"," # read left image to encoded string\n"," l_endcode = ''\n"," if candidate_left[i] != '':\n"," with open(candidate_left[i], \"rb\") as l_file:\n"," l_encode = base64.b64encode( l_file.read() ).decode()\n","\n"," # read right image to encoded string\n"," r_encode = ''\n"," if candidate_right[i] != '':\n"," with open(candidate_right[i], \"rb\") as r_file:\n"," r_encode = base64.b64encode( r_file.read() ).decode() \n","\n"," # present images\n"," html += f''\n"," html += f''\n"," html += ''\n","\n"," elif column_name != 'image_path': # display text values\n","\n"," if column_name == 'z_cluster': z_cluster = candidate_left[i]\n","\n"," html += ''\n"," html += f'{column_name}'\n"," html += f'{str(candidate_left[i])}'\n"," html += f'{str(candidate_right[i])}'\n"," html += ''\n","\n"," # insert data table\n"," table = widgets.HTML(value=f''+html+'
')\n"," z_cluster = None\n","\n"," # assign label options to pair\n"," label = widgets.ToggleButtons(\n"," options=LABELS.keys(), \n"," button_style='info'\n"," )\n","\n"," # define blank line between displayed pair and next\n"," blankLine=widgets.HTML(value='
')\n","\n"," # append pair, label and blank line to widget structure\n"," vContainers.append(widgets.VBox(children=[table, label, blankLine]))\n","\n","# present widget\n","display(widgets.VBox(children=vContainers))\n","# ========================================================\n","\n","# mark flag to allow save \n","ready_for_save = True\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":23,"statement_ids":[23],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.2971586Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:44.8516182Z","execution_finish_time":"2024-12-12T14:39:45.7453958Z","parent_msg_id":"f4eac308-98ad-4ac2-b881-a6f991545aca"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 23, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":[" z_zid z_cluster z_prediction z_score z_isMatch rec_id \\\n0 34 1734014375837:0 -1.0 0.0 -1 rec-1022-dup-1 \n1 17 1734014375837:0 -1.0 0.0 -1 rec-1029-dup-1 \n2 56 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n3 26 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n4 47 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n5 17 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n6 59 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n7 29 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n8 32 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n9 2 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n10 33 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n11 3 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n12 41 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n13 11 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n14 57 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n15 27 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n16 47 1734014375837:8 -1.0 0.0 -1 rec-1029-dup-1 \n17 34 1734014375837:8 -1.0 0.0 -1 rec-1022-dup-1 \n18 46 1734007288465:0 -1.0 0.0 -1 rec-1029-dup-0 \n19 24 1734007288465:0 -1.0 0.0 -1 rec-1031-dup-0 \n20 48 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n21 18 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n22 24 1734007288465:12 -1.0 0.0 -1 rec-1031-dup-0 \n23 1 1734007288465:12 -1.0 0.0 -1 rec-1021-dup-0 \n24 37 1734007288465:3 -1.0 0.0 -1 rec-1022-dup-4 \n25 20 1734007288465:3 -1.0 0.0 -1 rec-1029-dup-4 \n26 53 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n27 23 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n28 46 1734007288465:8 -1.0 0.0 -1 rec-1029-dup-0 \n29 1 1734007288465:8 -1.0 0.0 -1 rec-1021-dup-0 \n\n fname lname stNo add1 add2 \\\n0 jackson eglinton 840 fowles street moun tjiew \n1 sachin stephenson 81 rose scott circuit cordoba manor \n2 brooklyn naar-caftenas 210 duffy street tourist park \n3 brooklyn naar-caftenas 210 duffy street tourist park \n4 sachin stephenson 81 rose scott circuit cordoba manor \n5 sachin stephenson 81 rose scott circuit cordoba manor \n6 jasmine chang 210 magnolia drive sunset valley \n7 jasmine chang 210 magnolia drive sunset valley \n8 thomas george 1 mcmanus place north turramurra \n9 thomas george 1 mcmanus place north turramurra \n10 jackson eglinton 840 fowles street mountview \n11 jackson eglinton 840 fowles street mountview \n12 xani green 2 phill ip avenue abbey green \n13 xani green 2 phill ip avenue abbey green \n14 zachary mccarthy 134 teal street greenwood \n15 zachary mccarthy 134 teal street greenwood \n16 sachin stephenson 81 rose scott circuit cordoba manor \n17 jackson eglinton 840 fowles street moun tjiew \n18 kylee stephenson 81 rose scott circuit cordoba anor \n19 samantha sabieray 68 quandong street wattle brae \n20 annalise stephenson 81 rose scott circuit cordoba manor \n21 annalise stephenson 81 rose scott circuit cordoba manor \n22 samantha sabieray 68 quandong street wattle brae \n23 thomas george 1 mcmanus place stoney creek \n24 jackson eglinton 840 fowles street mountv iew \n25 kylee stephenson 81 rose scott circuit cordoba manor \n26 emma crossman 53 mcdowall place kellhaven \n27 emma crossman 53 mcdowall place kellhaven \n28 kylee stephenson 81 rose scott circuit cordoba anor \n29 thomas george 1 mcmanus place stoney creek \n\n city state dob ssn z_zsource \n0 2830 sa 19830807 2932837 testFebrl \n1 4226 vic 19461101 4783085 testFebrl \n2 2481 nsw 19840802 3624304 testFebrl \n3 2481 nsw 19840802 3624304 testFebrl \n4 4226 vic 19461101 4783085 testFebrl \n5 4226 vic 19461101 4783085 testFebrl \n6 3021 vic 19930203 4562381 testFebrl \n7 3021 vic 19930203 4562381 testFebrl \n8 3130 sa 19630225 5460534 testFebrl \n9 3130 sa 19630225 5460534 testFebrl \n10 2803 sa 19830807 2932837 testFebrl \n11 2803 sa 19830807 2932837 testFebrl \n12 5108 nsw 19390410 9201057 testFebrl \n13 5108 nsw 19390410 9201057 testFebrl \n14 6024 wa 19860219 3241102 testFebrl \n15 6024 wa 19860219 3241102 testFebrl \n16 4226 vic 19461101 4783085 testFebrl \n17 2830 sa 19830807 2932837 testFebrl \n18 4226 vic 19461101 4783085 testFebrl \n19 4019 wa 19590807 2863290 testFebrl \n20 4226 vic 19461101 4783085 testFebrl \n21 4226 vic 19461101 4783085 testFebrl \n22 4019 wa 19590807 2863290 testFebrl \n23 3130 sa 19630225 5460534 testFebrl \n24 2830 sa 19830807 2932837 testFebrl \n25 4226 vic 19461101 4783085 testFebrl \n26 5608 vic 19391027 3561186 testFebrl \n27 5608 vic 19391027 3561186 testFebrl \n28 4226 vic 19461101 4783085 testFebrl \n29 3130 sa 19630225 5460534 testFebrl \n['1734014375837:0', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734014375837:0', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:1', 'rec-1032-dup-0', ' brooklyn', ' naar-caftenas', ' 210', ' duffy street', ' tourist park', ' 2481', ' nsw', ' 19840802', ' 3624304']\n['1734014375837:1', 'rec-1032-dup-0', 'brooklyn', 'naar-caftenas', '210', 'duffy street', 'tourist park', '2481', 'nsw', '19840802', '3624304']\n['1734014375837:12', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:12', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:16', 'rec-1034-org', ' jasmine', ' chang', ' 210', ' magnolia drive', ' sunset valley', ' 3021', ' vic', ' 19930203', ' 4562381']\n['1734014375837:16', 'rec-1034-org', 'jasmine', 'chang', '210', 'magnolia drive', 'sunset valley', '3021', 'vic', '19930203', '4562381']\n['1734014375837:2', 'rec-1021-org', ' thomas', ' george', ' 1', ' mcmanus place', ' north turramurra', ' 3130', ' sa', ' 19630225', ' 5460534']\n['1734014375837:2', 'rec-1021-org', 'thomas', 'george', '1', 'mcmanus place', 'north turramurra', '3130', 'sa', '19630225', '5460534']\n['1734014375837:3', 'rec-1022-dup-0', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountview', ' 2803', ' sa', ' 19830807', ' 2932837']\n['1734014375837:3', 'rec-1022-dup-0', 'jackson', 'eglinton', '840', 'fowles street', 'mountview', '2803', 'sa', '19830807', '2932837']\n['1734014375837:4', 'rec-1026-dup-0', ' xani', ' green', ' 2', ' phill ip avenue', ' abbey green', ' 5108', ' nsw', ' 19390410', ' 9201057']\n['1734014375837:4', 'rec-1026-dup-0', 'xani', 'green', '2', 'phill ip avenue', 'abbey green', '5108', 'nsw', '19390410', '9201057']\n['1734014375837:7', 'rec-1033-org', ' zachary', ' mccarthy', ' 134', ' teal street', ' greenwood', ' 6024', ' wa', ' 19860219', ' 3241102']\n['1734014375837:7', 'rec-1033-org', 'zachary', 'mccarthy', '134', 'teal street', 'greenwood', '6024', 'wa', '19860219', '3241102']\n['1734014375837:8', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:8', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:0', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:0', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:1', 'rec-1029-dup-2', ' annalise', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:1', 'rec-1029-dup-2', 'annalise', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:12', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:12', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n['1734007288465:3', 'rec-1022-dup-4', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountv iew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:3', 'rec-1029-dup-4', 'kylee', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:4', 'rec-1031-org', ' emma', ' crossman', ' 53', ' mcdowall place', ' kellhaven', ' 5608', ' vic', ' 19391027', ' 3561186']\n['1734007288465:4', 'rec-1031-org', 'emma', 'crossman', '53', 'mcdowall place', 'kellhaven', '5608', 'vic', '19391027', '3561186']\n['1734007288465:8', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:8', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n"]},{"output_type":"display_data","data":{"text/plain":"VBox(children=(HTML(value='

Indicate if each of the 15 record pairs is a match or not

'), VBox(chil…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"01ee458406bc4bc7aae55eb99c0b504b"}},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":24,"statement_ids":[24],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:07.0951338Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:07.7673389Z","execution_finish_time":"2024-12-12T14:40:08.7466527Z","parent_msg_id":"bdc81fed-0318-4c1e-9a05-c19863f74f86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 24, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":25,"statement_ids":[25],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:11.2518685Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:11.8231998Z","execution_finish_time":"2024-12-12T14:40:12.0645572Z","parent_msg_id":"875bd6d4-812c-4287-89ec-65b08d5b15f7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 25, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":26,"statement_ids":[26],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:18.2988145Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:18.8789311Z","execution_finish_time":"2024-12-12T14:40:19.1201871Z","parent_msg_id":"5db081fe-5e88-4519-a2c6-fcc370fbfafc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 26, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":27,"statement_ids":[27],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:42.2210094Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:42.7984267Z","execution_finish_time":"2024-12-12T14:40:43.0525888Z","parent_msg_id":"048f0931-0eaf-4be3-ae1f-cbd4c06d2e9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 27, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":28,"statement_ids":[28],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:43.7678985Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:44.3138165Z","execution_finish_time":"2024-12-12T14:40:44.5580052Z","parent_msg_id":"462f3847-e026-4744-9b81-4435f1c8ad9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 28, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":29,"statement_ids":[29],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:55.8774777Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:56.4326849Z","execution_finish_time":"2024-12-12T14:40:56.7235357Z","parent_msg_id":"16b1eb37-22d6-440f-85ff-57c744336e9f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 29, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":30,"statement_ids":[30],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:03.1431734Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:03.6780666Z","execution_finish_time":"2024-12-12T14:41:03.9184142Z","parent_msg_id":"08566780-4456-4005-be13-646d0df8ca23"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 30, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":31,"statement_ids":[31],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:12.9413749Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:13.5109925Z","execution_finish_time":"2024-12-12T14:41:13.7677758Z","parent_msg_id":"37011b0e-d098-4aa2-b74b-9f7ed8e5092f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 31, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":32,"statement_ids":[32],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:23.0819227Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:23.7271973Z","execution_finish_time":"2024-12-12T14:41:23.9748964Z","parent_msg_id":"00b11703-7206-4822-8eeb-ea326f892b1e"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 32, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":33,"statement_ids":[33],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:31.7381977Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:32.2866112Z","execution_finish_time":"2024-12-12T14:41:32.5342842Z","parent_msg_id":"65cbb945-0a65-4942-bfaa-233cbc4641ee"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 33, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":34,"statement_ids":[34],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:39.941469Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:40.5983996Z","execution_finish_time":"2024-12-12T14:41:40.848122Z","parent_msg_id":"0f447c56-a165-436a-b7a1-7d5096f3f966"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 34, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":35,"statement_ids":[35],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:51.2539429Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:51.8238466Z","execution_finish_time":"2024-12-12T14:41:52.075655Z","parent_msg_id":"09ec44eb-26ef-4d82-b198-22ab624c9ecc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 35, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":36,"statement_ids":[36],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:02.26967Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:02.8636434Z","execution_finish_time":"2024-12-12T14:42:03.1209762Z","parent_msg_id":"d701ef7e-6c03-4f6f-bccc-3d1dd11d246c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 36, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":37,"statement_ids":[37],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:11.285235Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:11.8311926Z","execution_finish_time":"2024-12-12T14:42:12.0650602Z","parent_msg_id":"d3820343-a606-479d-bcfe-9c1da6f2a104"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 37, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":38,"statement_ids":[38],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:20.7858335Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:21.3273077Z","execution_finish_time":"2024-12-12T14:42:21.6218612Z","parent_msg_id":"744f8a1d-0658-4fe8-ba1a-c225cb1f2bf7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 38, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":39,"statement_ids":[39],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:30.8794009Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:31.4177187Z","execution_finish_time":"2024-12-12T14:42:31.6735656Z","parent_msg_id":"34e08c99-8c30-4af2-8fae-fe81e0f51e1b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 39, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":40,"statement_ids":[40],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:41.3482104Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:41.8980878Z","execution_finish_time":"2024-12-12T14:42:42.1374491Z","parent_msg_id":"3daf28a4-fbc8-4efd-a361-7cb4a2d489b4"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 40, Finished, Available, Finished)"},"metadata":{}}],"execution_count":19,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"2fbe3b6c-9a71-4c3f-8cd6-af6eedad956c"},{"cell_type":"code","source":["notebookutils.fs.ls(\"/\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":5,"statement_ids":[5],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:37:55.2180433Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:05.3684078Z","execution_finish_time":"2024-12-12T14:38:08.0399328Z","parent_msg_id":"340db6fd-15b9-49e4-b8d4-124a4cc2f05d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 5, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":7,"data":{"text/plain":"[FileInfo(path=abfss://e803987a-98b6-445f-815c-3d15c2c46877@onelake.dfs.fabric.microsoft.com/36ef8bc2-c67a-4512-b060-e25489729c71, name=36ef8bc2-c67a-4512-b060-e25489729c71, size=0)]"},"metadata":{}}],"execution_count":1,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"77417f1d-c2a6-4160-9b9c-12b0fbee5839"},{"cell_type":"code","source":["if not ready_for_save:\n"," print('No labels have been assigned. Run the previous cell to create candidate pairs and assign labels to them before re-running this cell.')\n","\n","else:\n","\n"," # ASSIGN LABEL VALUE TO CANDIDATE PAIRS IN DATAFRAME\n"," # ========================================================\n"," # for each pair in displayed widget\n"," for pair in vContainers[1:]:\n","\n"," # get pair and assigned label\n"," html_content = pair.children[1].get_interact_value() # the displayed pair as html\n"," user_assigned_label = pair.children[1].get_interact_value() # the assigned label\n","\n"," # extract candidate pair id from html pair content\n"," start = pair.children[0].value.find('data-title=\"')\n"," if start > 0: \n"," start += len('data-title=\"') \n"," end = pair.children[0].value.find('\"', start+2)\n"," pair_id = pair.children[0].value[start:end]\n","\n","\n","\n"," # assign label to candidate pair entry in dataframe\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster']==pair_id, 'z_isMatch'] = LABELS.get(user_assigned_label)\n"," # ========================================================\n","\n"," # SAVE LABELED DATA TO ZINGG FOLDER\n"," # ========================================================\n"," # make target directory if needed\n"," notebookutils.fs.mkdirs(MARKED_DIR)\n"," \n"," # save label assignments\n"," # save labels\n"," zingg.writeLabelledOutputFromPandas(candidate_pairs_pd,args)\n","\n"," # count labels accumulated\n"," marked_pd_df = getPandasDfFromDs(zingg.getMarkedRecords())\n"," n_pos, n_neg, n_tot = count_labeled_pairs(marked_pd_df)\n"," print(f'You have accumulated {n_pos} pairs labeled as positive matches.')\n"," print(f'You have accumulated {n_neg} pairs labeled as not matches.')\n"," print(\"If you need more pairs to label, re-run the cell for 'findTrainingData'\")\n"," # ======================================================== \n","\n"," # save completed\n"," ready_for_save = False"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":41,"statement_ids":[41],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:43:16.772682Z","session_start_time":null,"execution_start_time":"2024-12-12T14:43:17.381583Z","execution_finish_time":"2024-12-12T14:43:31.9046383Z","parent_msg_id":"ed09275a-e109-4cb1-802d-3909c879a2ad"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 41, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py:147: UserWarning: DataFrame constructor is internal. Do not directly use it.\n warnings.warn(\"DataFrame constructor is internal. Do not directly use it.\")\n"]},{"output_type":"stream","name":"stdout","text":["You have accumulated 9 pairs labeled as positive matches.\nYou have accumulated 6 pairs labeled as not matches.\nIf you need more pairs to label, re-run the cell for 'findTrainingData'\n"]}],"execution_count":20,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9795bb7f-cd3e-41c5-98fd-6341129df8e3"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"trainMatch\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":42,"statement_ids":[42],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:49:47.2575582Z","session_start_time":null,"execution_start_time":"2024-12-12T14:49:47.8553896Z","execution_finish_time":"2024-12-12T14:51:37.5141836Z","parent_msg_id":"f77d784e-0276-440c-8113-c6d060096abf"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 42, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'trainMatch']\narguments for client options are ['--phase', 'trainMatch', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":21,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"71928547-bc82-4653-960f-6c376524f651"},{"cell_type":"code","source":["outputDF = spark.read.csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/part-00000-d624fac4-b80c-4f8d-aebc-5d5faf351b8f-c000.csv\")\n","\n","colNames = [\"z_minScore\", \"z_maxScore\", \"z_cluster\", \"rec_id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","outputDF.toDF(*colNames).show(100)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":47,"statement_ids":[47],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:05:16.9588841Z","session_start_time":null,"execution_start_time":"2024-12-12T15:05:17.7549538Z","execution_finish_time":"2024-12-12T15:05:19.4042746Z","parent_msg_id":"f45225e4-62b8-4836-b7d8-bf0d91575730"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 47, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n| z_minScore| z_maxScore|z_cluster| rec_id| fname| lname|stNo| add1| add2|city|state| dob| ssn|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n|0.9999999999995524|0.9999999999995524| 26|rec-1032-dup-0|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999995358|0.9999999999995358| 24|rec-1031-dup-0|samantha| sabieray| 68| quandong street| wattle brae|4019| wa|19590807|2863290|\n|0.9999999977273273|0.9999999977273273| 2| rec-1021-org| thomas| george| 1| mcmanus place|north turramurra|3130| sa|19630225|5460534|\n|0.9999999999997746|0.9999999999997746| 15| rec-1028-org|eglinton| NULL| 24| curriecrescent| woorniyan|3749| qld|19180205|9341716|\n|0.9999999999991117|0.9999999999991117| 18|rec-1029-dup-2|annalise| stephenson| 81|rose scott circuit| cordoba manor|4226| vic|19461101|4783085|\n|0.9999999999991869|0.9999999999991869| 29| rec-1034-org| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999988902|0.9999999999988902| 3|rec-1022-dup-0| jackson| eglinton| 840| fowles street| mountview|2803| sa|19830807|2932837|\n|0.9999999999994619|0.9999999999994619| 19|rec-1029-dup-3| kylee| turale| 81| cordoba manor| ashfield|4226| vic|19461101|4783085|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999969422861|0.9999999969422861| 1|rec-1021-dup-0| thomas| george| 1| mcmanus place| stoney creek|3130| sa|19630225|5460534|\n|0.9999999999990814|0.9999999999990814| 8| rec-1023-org| gianni| matson| 701| willis street| boonoobloo|3101| vic|19410111|2540080|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999994932|0.9999999999994932| 23| rec-1031-org| emma| crossman| 53| mcdowall place| kellhaven|5608| vic|19391027|3561186|\n|0.9999999999995524|0.9999999999995524| 25| rec-1032-org|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999973147|0.9999999999973147| 5|rec-1022-dup-2| jackson| eglinton| 840| fowles street| mou nview|2830| sa|19830807|2932837|\n|0.9999999999991869|0.9999999999991869| 28|rec-1034-dup-0| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999988648708|0.9999999988648708| 0| rec-1020-org| blake| ryan| 4| starling place| berkeley vlge|5412| nsw|19271027|2402765|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n\n"]}],"execution_count":26,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"383bac89-e461-431f-ba14-5ab59941942c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"generateDocs\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":48,"statement_ids":[48],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:06:42.854029Z","session_start_time":null,"execution_start_time":"2024-12-12T15:06:43.5186144Z","execution_finish_time":"2024-12-12T15:06:46.2120472Z","parent_msg_id":"f73996c7-08d7-4621-b654-4975b23615ab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 48, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'generateDocs']\narguments for client options are ['--phase', 'generateDocs', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":27,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"da00dc40-2163-4247-bfef-21fa918ddfdd"},{"cell_type":"code","source":["DOCS_DIR = zinggDir + \"/\" + modelId + \"/docs/\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":50,"statement_ids":[50],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:24.1740612Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:24.7585436Z","execution_finish_time":"2024-12-12T15:11:25.0621234Z","parent_msg_id":"808875a7-ca97-42ba-b75c-ea92d72410a5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 50, Finished, Available, Finished)"},"metadata":{}}],"execution_count":29,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0d4e3074-53a5-44a0-9b48-8f0f76a7c950"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"model.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":51,"statement_ids":[51],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:35.8141287Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:36.3540639Z","execution_finish_time":"2024-12-12T15:11:36.652124Z","parent_msg_id":"81153656-b2b8-4430-bc2a-d385f917e9a2"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 51, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n Zingg Model Documentation\n \n\n\n\n

\n \n\t \n\t\t \t\n\t\t\t\t\n\t\t \t\n\t \n
Unmarked 0/15, Marked 15/15 (9 Matches, 6 Non-Matches, 0 Unsure)
\n

\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Cluster z_score z_isMatch rec_id fname lname stNo add1 add2 city state dob ssn z_zsource
\n 1734007288465:0\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:1\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-2\n\n \n \n annalise\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-2\n\n \n \nannalise\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:12\n \n0\n\n \n \n0\n\n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:3\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-4\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountv iew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-4\n\n \n \nkylee\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:4\n \n0\n\n \n \n1\n\n \n \nrec-1031-org\n\n \n \n emma\n\n \n \n crossman\n\n \n \n 53\n\n \n \n mcdowall place\n\n \n \n kellhaven\n\n \n \n 5608\n\n \n \n vic\n\n \n \n 19391027\n\n \n \n 3561186\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-org\n\n \n \nemma\n\n \n \ncrossman\n\n \n \n53\n\n \n \nmcdowall place\n\n \n \nkellhaven\n\n \n \n5608\n\n \n \nvic\n\n \n \n19391027\n\n \n \n3561186\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:0\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:1\n \n0\n\n \n \n1\n\n \n \nrec-1032-dup-0\n\n \n \nbrooklyn\n\n \n \nnaar-caftenas\n\n \n \n210\n\n \n \nduffy street\n\n \n \ntourist park\n\n \n \n2481\n\n \n \nnsw\n\n \n \n19840802\n\n \n \n3624304\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1032-dup-0\n\n \n \n brooklyn\n\n \n \n naar-caftenas\n\n \n \n 210\n\n \n \n duffy street\n\n \n \n tourist park\n\n \n \n 2481\n\n \n \n nsw\n\n \n \n 19840802\n\n \n \n 3624304\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:12\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:16\n \n0\n\n \n \n1\n\n \n \nrec-1034-org\n\n \n \n jasmine\n\n \n \n chang\n\n \n \n 210\n\n \n \n magnolia drive\n\n \n \n sunset valley\n\n \n \n 3021\n\n \n \n vic\n\n \n \n 19930203\n\n \n \n 4562381\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1034-org\n\n \n \njasmine\n\n \n \nchang\n\n \n \n210\n\n \n \nmagnolia drive\n\n \n \nsunset valley\n\n \n \n3021\n\n \n \nvic\n\n \n \n19930203\n\n \n \n4562381\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:2\n \n0\n\n \n \n1\n\n \n \nrec-1021-org\n\n \n \n thomas\n\n \n \n george\n\n \n \n 1\n\n \n \n mcmanus place\n\n \n \n north turramurra\n\n \n \n 3130\n\n \n \n sa\n\n \n \n 19630225\n\n \n \n 5460534\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-org\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nnorth turramurra\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:3\n \n0\n\n \n \n1\n\n \n \nrec-1022-dup-0\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountview\n\n \n \n 2803\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-0\n\n \n \njackson\n\n \n \neglinton\n\n \n \n840\n\n \n \nfowles street\n\n \n \nmountview\n\n \n \n2803\n\n \n \nsa\n\n \n \n19830807\n\n \n \n2932837\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:4\n \n0\n\n \n \n1\n\n \n \nrec-1026-dup-0\n\n \n \n xani\n\n \n \n green\n\n \n \n 2\n\n \n \n phill ip avenue\n\n \n \n abbey green\n\n \n \n 5108\n\n \n \n nsw\n\n \n \n 19390410\n\n \n \n 9201057\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1026-dup-0\n\n \n \nxani\n\n \n \ngreen\n\n \n \n2\n\n \n \nphill ip avenue\n\n \n \nabbey green\n\n \n \n5108\n\n \n \nnsw\n\n \n \n19390410\n\n \n \n9201057\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:7\n \n0\n\n \n \n1\n\n \n \nrec-1033-org\n\n \n \n zachary\n\n \n \n mccarthy\n\n \n \n 134\n\n \n \n teal street\n\n \n \n greenwood\n\n \n \n 6024\n\n \n \n wa\n\n \n \n 19860219\n\n \n \n 3241102\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1033-org\n\n \n \nzachary\n\n \n \nmccarthy\n\n \n \n134\n\n \n \nteal street\n\n \n \ngreenwood\n\n \n \n6024\n\n \n \nwa\n\n \n \n19860219\n\n \n \n3241102\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n \n\n

\n\n\n"},"metadata":{}}],"execution_count":30,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9e4ad578-f75f-4011-8027-dc565933adc6"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"data.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":52,"statement_ids":[52],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:13:39.3741915Z","session_start_time":null,"execution_start_time":"2024-12-12T15:13:39.95129Z","execution_finish_time":"2024-12-12T15:13:40.2508845Z","parent_msg_id":"e6afa7a6-fd1b-454d-af86-38b6e6686506"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 52, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n\tData Documentation\n\t\n\n\n\t\n\n\t\n\t\t\t\n\t\t\t\n\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\n\t
Field NameField TypeNullable
\n\t\t\t\t\trec_id\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tfname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tlname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstNo\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd1\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd2\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tcity\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstate\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tdob\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tssn\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\n\n\n"},"metadata":{}}],"execution_count":31,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e58aad4c-1ee3-4977-b211-ebeb9d7539c9"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"version_major":2,"version_minor":0,"state":{"0112614dd803438a986c77cfda539dba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cd7680c5c7d54872b46d824dfd45b61f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_04911938acd2486e8fc0ded740020ea1","style":"IPY_MODEL_ad77a508719f4730a16cf01475525150"}},"6f94a4de6db941189e6a0deabf52e2ad":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7f48a6c51c9f458a80deed26ea3b9011","IPY_MODEL_9efc44bbb2af482989a69577c7b793d0","IPY_MODEL_abc4ad768b3d4f75b3f6f8e3d9d3350d"],"layout":"IPY_MODEL_e0d2670f67e34eee81694ce7b7c97cd7"}},"0c26c8827bf54b95a4cc7d119b485e81":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e5b99552291e4649acf8760161e02ad9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"6a13045354274a089c720f0a3f6fc7b7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_a78ca3ab571448c09c99720e6914c9a5","IPY_MODEL_fd4beb5f2be94c609aed0730b98b9fea","IPY_MODEL_2019411034194afc8bea365fa7205623"],"layout":"IPY_MODEL_41e5e2f1dabe421d90c77a0af367cc74"}},"1a16c51638774862acb327afd5a6f057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ae4bd3e8f34741e7b87423cdaf49a198":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"01b2b8f50eb348cf9ee75f3145179cee":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8b71f2fe25b0404faedd772588744c33":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"7f48a6c51c9f458a80deed26ea3b9011":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_9f7543b4d79248bc8ecf6e9ce6bf31cf","style":"IPY_MODEL_241d4546ce8b4f0684be34c8b75eb58f"}},"d3bb974dd1f0490bb77dffaf8540d439":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_47e1703b3d45461f816b4ec1f8ea445a","style":"IPY_MODEL_8b71f2fe25b0404faedd772588744c33"}},"2266b285bd664631a0a6c9e89a35ed51":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"3af6c6b8d18d48ca89cbc4f5299f6f72":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e9d8900ddcf64682bbf5198fbf46f39d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7468229546d94bfcab6525edb9757637","tooltips":[],"style":"IPY_MODEL_f1bad4094ead437cbc0eda8372c538a8","icons":[]}},"63e74252206d4c5db3c7a350096b0435":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4cbbd9bb43ea4bcb82861e22c1478cf3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c26c8827bf54b95a4cc7d119b485e81","style":"IPY_MODEL_db63ca43d6934485987860bb1f441f29"}},"67d9530cacbf4bbe8144836c57e61acb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:81734014375837:8
rec_idrec-1029-dup-1rec-1022-dup-1
fname sachin jackson
lname stephenson eglinton
stNo 81 840
add1 rose scott circuit fowles street
add2 cordoba manor moun tjiew
city 4226 2830
state vic sa
dob 19461101 19830807
ssn 4783085 2932837
","layout":"IPY_MODEL_7862a64b0ced43e8b70b7f5684987936","style":"IPY_MODEL_2d427fa36cec488e8239a8c453efc375"}},"1829f914d5274fcc89106d626e3295de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7a6c3a89abf64a438aa69a6d0e63782e","IPY_MODEL_8b544a3eb42548698fec50307ca58cf0","IPY_MODEL_7ab4a49ee5cc4cd2bdc3a7b0cd066e29"],"layout":"IPY_MODEL_9d57f12f444b47b58f6982290bc17ba2"}},"d973662f8e8d4d80add362dc786e8325":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"ad77a508719f4730a16cf01475525150":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"39cadceacdbc4966a574c52a98c6260d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 6 record pairs is a match or not

","layout":"IPY_MODEL_5694a3ce6d8d4ae4b3022ded67aa7fd6","style":"IPY_MODEL_d973662f8e8d4d80add362dc786e8325"}},"8e9304290aab4a1fa38a89411af22922":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2d427fa36cec488e8239a8c453efc375":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9909b484567e49d3a2b619fec9e125b9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9fe8115b161a4a309887a31b449f2989":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"970014aa3a6b4acb981c239e49b5c8a1":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eedf22cb2361430099f8f6169cb418ea":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_f5e420d27b5d4c92bc8380c01cfa2151","IPY_MODEL_40544637e23545a1a6fc511777301f2d","IPY_MODEL_fcd49a0c3a1342b1bb6473cf90c1b88b"],"layout":"IPY_MODEL_f1be32a9a51445f98e99e3b4a2c697bb"}},"6225593e71364eb181cff48c1cfcfcc2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a78b5089adc74cd896d1e477251a4ac6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5306ed2302184ab8ba22c30999cb5572":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d1ca7f2a677e4e2783d660faee4c4701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_1f1ae689a00642b597a76f6721a06432","style":"IPY_MODEL_fe6677ee651742e1abf26212230c71af"}},"721f29e0f7664888a2936a3ceddafb6d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"23f62e8b7e2e4be1ae544202d2c1d38d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3fc421549e7425b815de2a3d01602d1","style":"IPY_MODEL_7f44c72c66414102acab1c2578025735"}},"4402fa32ec2e4f12afbd61344d431bcc":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"78889cdf217643fa9f4d114f1918b2f6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"083dbadeee3f4683a499f9b612768701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c847d55d401e46bba108bca1bf8a7770","style":"IPY_MODEL_efade4d483f24f349d3d478be973b355"}},"1e2bcb99927b4a8cb5c7dd4eaac39225":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0371cfc91c0d421ab01ddd16b3972743":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_3bda20edce274aa7b1a92b98914530e1","IPY_MODEL_ccbf1dffd785415594fd880aa5cc8edf","IPY_MODEL_498839735d8f40018aca7aac0da8f5c9"],"layout":"IPY_MODEL_25e1281b496a4a958955a4d9091ca382"}},"01ee458406bc4bc7aae55eb99c0b504b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_af7596b42e5c4b9da6a85846c55f2092","IPY_MODEL_e3697e92e3e04c82b865bc3328dcad2b","IPY_MODEL_4c7afd0822eb4871b7708acbfb040fbf","IPY_MODEL_5d8d51ddc216416cb12979d0f38aae5a","IPY_MODEL_4ddf0fd6818343a58cee87bd452691eb","IPY_MODEL_a8bf95eb6af447ee89f946a9b6b4f1a9","IPY_MODEL_0371cfc91c0d421ab01ddd16b3972743","IPY_MODEL_804f5f862a2547cc833f3f27c18d69de","IPY_MODEL_b95905218e04479b8cba30790100004b","IPY_MODEL_55172f1685204f24a3b38debc635c6b9","IPY_MODEL_b47d111ecdf142a9bf96dea7cc00f12e","IPY_MODEL_0096a2bb367e4410ab96be94878df836","IPY_MODEL_9f688658e0a84aab86fb4b6e9b14eeb5","IPY_MODEL_6a13045354274a089c720f0a3f6fc7b7","IPY_MODEL_6f94a4de6db941189e6a0deabf52e2ad","IPY_MODEL_1829f914d5274fcc89106d626e3295de"],"layout":"IPY_MODEL_ddcfc3d0e90741c0a6c0b67b47f6f53d"}},"5423e9abb08d4175a8c593b60b35ad8d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"952a9f160893406791ec1975a5af971f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc724d1ceb584472a158a91de7b17cae":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:41734014375837:4
rec_idrec-1026-dup-0rec-1026-dup-0
fname xanixani
lname greengreen
stNo 22
add1 phill ip avenuephill ip avenue
add2 abbey greenabbey green
city 51085108
state nswnsw
dob 1939041019390410
ssn 92010579201057
","layout":"IPY_MODEL_f596ee340faa4691abdef6d010ff513c","style":"IPY_MODEL_9e7440ae7f6844f3a8c084a8379df095"}},"f75d9074d0674656b77cb99efcbfe37d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"498839735d8f40018aca7aac0da8f5c9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3b9f4a35a1741cdab1b8127376790be","style":"IPY_MODEL_7ec772d0ae8d4365bd39d4a4b8050837"}},"942ce2043b974942801386f7fe813e59":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d7c93338fb5744a98060d36f29894737":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_29bb51c1b4b842d7992d0c6be6e582c8","style":"IPY_MODEL_5250e70ff02e4d219de6502a27b84357"}},"e23cfe9a93804558acc75418021aa409":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:01734014375837:0
rec_idrec-1022-dup-1rec-1029-dup-1
fname jacksonsachin
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 moun tjiewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_a36bb933f92c4ada82504e4c10570057","style":"IPY_MODEL_cbbfcbe143644072846912c9d8f1c6d7"}},"854564d76efa4e17b66c5e86ac9b8783":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_62d1842b557f49399311b9b573dac9d5","style":"IPY_MODEL_abea2c5d5ee14775a1e9c5a025bb83f2"}},"7ad966747291400d9013a2a2e2b26e10":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4c48892283394169b0911d6922a97058":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"56a4135e67644d0a83f0612cfe92fea8":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:161734014375837:16
rec_idrec-1034-orgrec-1034-org
fname jasminejasmine
lname changchang
stNo 210210
add1 magnolia drivemagnolia drive
add2 sunset valleysunset valley
city 30213021
state vicvic
dob 1993020319930203
ssn 45623814562381
","layout":"IPY_MODEL_4ebfc8728d2c4186a14ab0d9e52ca0c5","style":"IPY_MODEL_970014aa3a6b4acb981c239e49b5c8a1"}},"714d113c8c894968a03f8521e9c6bdf7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2019411034194afc8bea365fa7205623":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_08b9883f77f148c0be1916fbe711a94f","style":"IPY_MODEL_a6c854c673a54b54aa8f5894539a717c"}},"6020cfd838a84c38b42baee5e2ab5239":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c3b9f4a35a1741cdab1b8127376790be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f596ee340faa4691abdef6d010ff513c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6cc91e9e20d343679c6c32830b960faa":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"db916c8e786c40abb3db1432a9688e1d":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d7c93338fb5744a98060d36f29894737","IPY_MODEL_279fb85975df426a821e8f7e46c90f25","IPY_MODEL_786c8eb15f0c4f58b458338018aa8e49"],"layout":"IPY_MODEL_ecbd13d9937c463ba6b654348c05dde3"}},"0a1166c59f694b399f6c9bcbb1e6c89a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_6225593e71364eb181cff48c1cfcfcc2","style":"IPY_MODEL_e5b99552291e4649acf8760161e02ad9"}},"454c2074dba54875b5ee91c45e229169":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_270b1bb9c8d740fbb2efecaf2e1f9f9d","style":"IPY_MODEL_8bc2bd72d40d4224a5fff0f2bccdcbd3"}},"18acd101aa8647c39f5a7c247cedf365":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_4c48892283394169b0911d6922a97058","style":"IPY_MODEL_4fdc3a5116b54cb88adc45c257305421"}},"02ccf836a76444bd99fd508ed827e13a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:21734014375837:2
rec_idrec-1021-orgrec-1021-org
fname thomasthomas
lname georgegeorge
stNo 11
add1 mcmanus placemcmanus place
add2 north turramurranorth turramurra
city 31303130
state sasa
dob 1963022519630225
ssn 54605345460534
","layout":"IPY_MODEL_5423e9abb08d4175a8c593b60b35ad8d","style":"IPY_MODEL_d54363eed626420f910bfcfa01b2e420"}},"cc8a117379724417a5481bb9d17126b5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8684f0945a9048019a3165273fa674e6":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"feeb7fe2ee5a40e196cd16cfb2ae7635":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"fcd49a0c3a1342b1bb6473cf90c1b88b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_63e74252206d4c5db3c7a350096b0435","style":"IPY_MODEL_73bdd9f2969640ddba2a56ae39ceb6b7"}},"6722bf94601449c0a162116c1770e74b":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e7b43d6a420f46458c199aab46c9eb43":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7b6b2d02996344f3a8b829ce2ba14026","tooltips":[],"style":"IPY_MODEL_2a82f125b47641b983a65520897e61a9","icons":[]}},"261d645c4aa24c10ad9c02e75ee2d0b0":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"41e5e2f1dabe421d90c77a0af367cc74":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2a571eec79e4117b5c8dcc04d42ea8c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"efade4d483f24f349d3d478be973b355":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4ddf0fd6818343a58cee87bd452691eb":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_56a4135e67644d0a83f0612cfe92fea8","IPY_MODEL_e9d8900ddcf64682bbf5198fbf46f39d","IPY_MODEL_a16fae766e5c4828ac184a17e8da44f9"],"layout":"IPY_MODEL_721f29e0f7664888a2936a3ceddafb6d"}},"a8bf95eb6af447ee89f946a9b6b4f1a9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_02ccf836a76444bd99fd508ed827e13a","IPY_MODEL_9bc94600605c4977ae1694a17888bd17","IPY_MODEL_d1ca7f2a677e4e2783d660faee4c4701"],"layout":"IPY_MODEL_937178220af4423daa2cd35aa8c3263a"}},"937178220af4423daa2cd35aa8c3263a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e3697e92e3e04c82b865bc3328dcad2b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e23cfe9a93804558acc75418021aa409","IPY_MODEL_482b6fc0521849dba90e938d82e68ed5","IPY_MODEL_854564d76efa4e17b66c5e86ac9b8783"],"layout":"IPY_MODEL_beea94f4506a4e83830588c4d4fcb1c7"}},"1320b18208d0404a8af38e1393051351":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2dc9896b314544f3bd71c32c625e1175":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"435029d048944a1d8bfd7f3af18ffeba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"026ce8c3d7e24f86adada904417924cf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_a78b5089adc74cd896d1e477251a4ac6","tooltips":[],"style":"IPY_MODEL_e2385f8daa6b4e8faecbc68192b40d14","icons":[]}},"0a3dc99ab26f42bf90522b4eabb0ad21":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_727805949ef54a7da481fe155bc77b47","tooltips":[],"style":"IPY_MODEL_7a93d4ae0e91471ab30ca90034d9f90c","icons":[]}},"7ec772d0ae8d4365bd39d4a4b8050837":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"786c8eb15f0c4f58b458338018aa8e49":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_32c982d5fd3545ff8e0bc9cbbe3dc90f","style":"IPY_MODEL_0203adb880ca48e1a6ead1b5af804670"}},"abea2c5d5ee14775a1e9c5a025bb83f2":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2f67e4e809494262b3752db712d75ce7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_18acd101aa8647c39f5a7c247cedf365","IPY_MODEL_4093238088364a1b934d6722c9468de8","IPY_MODEL_7d62968db1ae4f4c8d5e27028e99c6d3"],"layout":"IPY_MODEL_fb146a7c62e44aab94d15666c4afb50a"}},"a16fae766e5c4828ac184a17e8da44f9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_9b29c240e7114680978ecef578ce5fd9","style":"IPY_MODEL_fe94e56c365f4bd8afcf9a57eced058e"}},"e1567066674b498ca58437b558f4ee8e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:71734014375837:7
rec_idrec-1033-orgrec-1033-org
fname zacharyzachary
lname mccarthymccarthy
stNo 134134
add1 teal streetteal street
add2 greenwoodgreenwood
city 60246024
state wawa
dob 1986021919860219
ssn 32411023241102
","layout":"IPY_MODEL_c24d9d54deb84bbab0da6405aea82569","style":"IPY_MODEL_6722bf94601449c0a162116c1770e74b"}},"4c7afd0822eb4871b7708acbfb040fbf":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_64f8752992414e9aa3b677911f0d4848","IPY_MODEL_dacefcb9fc10425e80c5233cb0ba4ffd","IPY_MODEL_2757b91608934f0daa7d9f2397a65d8d"],"layout":"IPY_MODEL_514b19922da24f17bb39aa72d78beaf4"}},"9efc44bbb2af482989a69577c7b793d0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_0112614dd803438a986c77cfda539dba","tooltips":[],"style":"IPY_MODEL_825e88947fcc454498b4739c0757c97d","icons":[]}},"afac862e71a043c381874456054c5e41":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9bc94600605c4977ae1694a17888bd17":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_ae4bd3e8f34741e7b87423cdaf49a198","tooltips":[],"style":"IPY_MODEL_4be40990a33d4872871d58e52d09d898","icons":[]}},"e2385f8daa6b4e8faecbc68192b40d14":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"b47d111ecdf142a9bf96dea7cc00f12e":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_10fadcb3c1214044b997e0d2668bd9d3","IPY_MODEL_75ca0d3400af41f0a754c346a121c9b6","IPY_MODEL_91b4da3856884938987c6d2cf5751f9f"],"layout":"IPY_MODEL_8a0d5bc35d6746959993d76e767f4bc8"}},"b72e35612aa7407890a329608f3f0d49":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d2809335c95b4235b0ca86feab6b14d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_f3c9cd7b31a84fb4bd262c69b122e11d","tooltips":[],"style":"IPY_MODEL_8e9304290aab4a1fa38a89411af22922","icons":[]}},"44acc8fae0314cb7a33463d2bc6353e7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3a2907ac772b46ed81c079f41434c74b":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d0d57063e8b144b49970df32c53ce162":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_882d27a063a94986bc304b02c5222b7a","tooltips":[],"style":"IPY_MODEL_0d2c43c11f554f02b9b0e521a02df66f","icons":[]}},"085d7c0804ab4af6bb42b2928a6c2bd5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"98d458cfcd874e2c8af3998379e6c432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3bda20edce274aa7b1a92b98914530e1":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:31734014375837:3
rec_idrec-1022-dup-0rec-1022-dup-0
fname jacksonjackson
lname eglintoneglinton
stNo 840840
add1 fowles streetfowles street
add2 mountviewmountview
city 28032803
state sasa
dob 1983080719830807
ssn 29328372932837
","layout":"IPY_MODEL_181192c2388e4db190a751c4042e238a","style":"IPY_MODEL_eb072c0a62a24f03b150bc624aad5a5d"}},"fe6677ee651742e1abf26212230c71af":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"29bb51c1b4b842d7992d0c6be6e582c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7b6b2d02996344f3a8b829ce2ba14026":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"19ffca6433c14da198770adae02221be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17243a3f0b654e11970f9b5bce82f79c":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d3f5a5077c9b441e832429ae5a364fbc","IPY_MODEL_7661a6f07c404d3392d0834ebb51f2d5","IPY_MODEL_4cbbd9bb43ea4bcb82861e22c1478cf3"],"layout":"IPY_MODEL_1a16c51638774862acb327afd5a6f057"}},"b2130bed69ca4703acb121ebccd506ca":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2a82f125b47641b983a65520897e61a9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9b29c240e7114680978ecef578ce5fd9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5694a3ce6d8d4ae4b3022ded67aa7fd6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d3f5a5077c9b441e832429ae5a364fbc":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_085d7c0804ab4af6bb42b2928a6c2bd5","style":"IPY_MODEL_754c27d772534ecaaedab5591427ca09"}},"db63ca43d6934485987860bb1f441f29":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9f7543b4d79248bc8ecf6e9ce6bf31cf":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"1f1ae689a00642b597a76f6721a06432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0203adb880ca48e1a6ead1b5af804670":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4fdc3a5116b54cb88adc45c257305421":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5e173e9779fd4ca08143464fd42bdf62":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"214f3e7e895d4f54bbaa829b69ca8671":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"081d75be0414491faaccaec2648ddcd9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"514b19922da24f17bb39aa72d78beaf4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"595a260ac98d49e6894496961fa7701c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5250e70ff02e4d219de6502a27b84357":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"fe94e56c365f4bd8afcf9a57eced058e":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"10fadcb3c1214044b997e0d2668bd9d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_805ed2cf73364f13addeaf13a8073620","style":"IPY_MODEL_115453304b8e477a96726060b0c509ad"}},"da34c9ff8e3b4738a59ec9eb0a39d2cb":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"aed62bd42df24b5788b0fa4f6e8fb610":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7f44c72c66414102acab1c2578025735":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"69c523dee7d54c3b8f0620ad2eb6dc51":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_0c96ba84dad84dbfb3b8347e9e7ae748","style":"IPY_MODEL_6020cfd838a84c38b42baee5e2ab5239"}},"25e1281b496a4a958955a4d9091ca382":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"727805949ef54a7da481fe155bc77b47":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b0d572405b3344278a443aa21138d927":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17f6fddf67e242588f39e2aaf0558678":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"91b4da3856884938987c6d2cf5751f9f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_318d9d146d1f41ee9a169043637dadb7","style":"IPY_MODEL_dad9c9e2d53744f4a2284917a78fd931"}},"7a93d4ae0e91471ab30ca90034d9f90c":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"0d2c43c11f554f02b9b0e521a02df66f":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"279fb85975df426a821e8f7e46c90f25":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_9e8426a14afa4c95bf89465efe99089f","tooltips":[],"style":"IPY_MODEL_47acc27c5bb047009eecaa7aa4974cac","icons":[]}},"f3c9cd7b31a84fb4bd262c69b122e11d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a6c854c673a54b54aa8f5894539a717c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c86d53a9d8394704aaa74e27d7569cc0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6542b2868c0c43359d500c3828ef12ef":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:121734014375837:12
rec_idrec-1029-dup-1rec-1029-dup-1
fname sachinsachin
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_3af6c6b8d18d48ca89cbc4f5299f6f72","style":"IPY_MODEL_e2a571eec79e4117b5c8dcc04d42ea8c"}},"dad9c9e2d53744f4a2284917a78fd931":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"708a2ae873f8426fade245382a8c9208":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_cd7680c5c7d54872b46d824dfd45b61f","IPY_MODEL_012518d9797f4087a352a23bf5ba2aaf","IPY_MODEL_4150bb26c66d4de4954e13af8d0cd781"],"layout":"IPY_MODEL_aed62bd42df24b5788b0fa4f6e8fb610"}},"ccbf1dffd785415594fd880aa5cc8edf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_01b2b8f50eb348cf9ee75f3145179cee","tooltips":[],"style":"IPY_MODEL_5e173e9779fd4ca08143464fd42bdf62","icons":[]}},"788b34a5563a423798cb54ff8d7b996c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"804f5f862a2547cc833f3f27c18d69de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_fc724d1ceb584472a158a91de7b17cae","IPY_MODEL_d2809335c95b4235b0ca86feab6b14d1","IPY_MODEL_23f62e8b7e2e4be1ae544202d2c1d38d"],"layout":"IPY_MODEL_714d113c8c894968a03f8521e9c6bdf7"}},"4be40990a33d4872871d58e52d09d898":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"4093238088364a1b934d6722c9468de8":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_17a7abd324054f039724fb423e2a67a4","tooltips":[],"style":"IPY_MODEL_afac862e71a043c381874456054c5e41","icons":[]}},"fb146a7c62e44aab94d15666c4afb50a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0c96ba84dad84dbfb3b8347e9e7ae748":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3308de4749240c6bcd404cb4caf7ee4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22483139248d470ca2edbb0b22a669d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_c86d53a9d8394704aaa74e27d7569cc0","tooltips":[],"style":"IPY_MODEL_77d77f14d7254453909994ace6b43eb5","icons":[]}},"270b1bb9c8d740fbb2efecaf2e1f9f9d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7af3659f738046f0a562d772fba7aadd":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"64f8752992414e9aa3b677911f0d4848":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:11734014375837:1
rec_idrec-1032-dup-0rec-1032-dup-0
fname brooklynbrooklyn
lname naar-caftenasnaar-caftenas
stNo 210210
add1 duffy streetduffy street
add2 tourist parktourist park
city 24812481
state nswnsw
dob 1984080219840802
ssn 36243043624304
","layout":"IPY_MODEL_6cc91e9e20d343679c6c32830b960faa","style":"IPY_MODEL_b345a2da49d84b559a59792c488d0c1f"}},"9e7440ae7f6844f3a8c084a8379df095":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"62d1842b557f49399311b9b573dac9d5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"47acc27c5bb047009eecaa7aa4974cac":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8bc2bd72d40d4224a5fff0f2bccdcbd3":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4abfebecf35e47b8bdab070a428d4a77":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"451cd21ac7b64517b93824dd5ab79460":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2757b91608934f0daa7d9f2397a65d8d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_2292728174764b0bb766d983d2d8f272","style":"IPY_MODEL_2266b285bd664631a0a6c9e89a35ed51"}},"b95905218e04479b8cba30790100004b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e1567066674b498ca58437b558f4ee8e","IPY_MODEL_8d8dc1ef9db8403dbe741141f95578e6","IPY_MODEL_083dbadeee3f4683a499f9b612768701"],"layout":"IPY_MODEL_435029d048944a1d8bfd7f3af18ffeba"}},"754c27d772534ecaaedab5591427ca09":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"26877fd9c74e49a999f8134e2d8a41d2":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_0a1166c59f694b399f6c9bcbb1e6c89a","IPY_MODEL_d0d57063e8b144b49970df32c53ce162","IPY_MODEL_b3ce0440576c4d22a90b74ecfddf9afb"],"layout":"IPY_MODEL_139af57eb88742fdaf311e40157b4c1b"}},"a78ca3ab571448c09c99720e6914c9a5":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_f6f566807665447d8947ef4f1c1cb802","style":"IPY_MODEL_081d75be0414491faaccaec2648ddcd9"}},"482b6fc0521849dba90e938d82e68ed5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b72e35612aa7407890a329608f3f0d49","tooltips":[],"style":"IPY_MODEL_f75d9074d0674656b77cb99efcbfe37d","icons":[]}},"2a7ce010e31c474d834773f51158ad6c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"32c982d5fd3545ff8e0bc9cbbe3dc90f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"dacefcb9fc10425e80c5233cb0ba4ffd":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_22aaffab00674834860abe4b7df78f36","tooltips":[],"style":"IPY_MODEL_3a2907ac772b46ed81c079f41434c74b","icons":[]}},"f5e420d27b5d4c92bc8380c01cfa2151":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_b2130bed69ca4703acb121ebccd506ca","style":"IPY_MODEL_942ce2043b974942801386f7fe813e59"}},"77d77f14d7254453909994ace6b43eb5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"f6f566807665447d8947ef4f1c1cb802":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ecbd13d9937c463ba6b654348c05dde3":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8a0d5bc35d6746959993d76e767f4bc8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"805ed2cf73364f13addeaf13a8073620":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22aaffab00674834860abe4b7df78f36":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc7bff94e2684f51b8ff148cdf04d0ff":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_39cadceacdbc4966a574c52a98c6260d","IPY_MODEL_17243a3f0b654e11970f9b5bce82f79c","IPY_MODEL_26877fd9c74e49a999f8134e2d8a41d2","IPY_MODEL_eedf22cb2361430099f8f6169cb418ea","IPY_MODEL_708a2ae873f8426fade245382a8c9208","IPY_MODEL_2f67e4e809494262b3752db712d75ce7","IPY_MODEL_db916c8e786c40abb3db1432a9688e1d"],"layout":"IPY_MODEL_214f3e7e895d4f54bbaa829b69ca8671"}},"9f688658e0a84aab86fb4b6e9b14eeb5":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_69c523dee7d54c3b8f0620ad2eb6dc51","IPY_MODEL_026ce8c3d7e24f86adada904417924cf","IPY_MODEL_5227aa6fa7c749238d811d462cb0fe36"],"layout":"IPY_MODEL_bd88f0c19aff4c1cb0bd3a5c52db200b"}},"d7ab081b539e42649eef86e6f7b6c76d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b59772ab1d914a24bcb3a77947962f2c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"08b9883f77f148c0be1916fbe711a94f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7468229546d94bfcab6525edb9757637":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17a7abd324054f039724fb423e2a67a4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fbf9d80d166744d88c66208824d17c24":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c49cc29fbd04b46b38f410912a180d9","style":"IPY_MODEL_b27b76432a684b6980b5052cadfea618"}},"e0d2670f67e34eee81694ce7b7c97cd7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d54363eed626420f910bfcfa01b2e420":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"241d4546ce8b4f0684be34c8b75eb58f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2292728174764b0bb766d983d2d8f272":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"af7596b42e5c4b9da6a85846c55f2092":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 15 record pairs is a match or not

","layout":"IPY_MODEL_4abfebecf35e47b8bdab070a428d4a77","style":"IPY_MODEL_4402fa32ec2e4f12afbd61344d431bcc"}},"8d8dc1ef9db8403dbe741141f95578e6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b59772ab1d914a24bcb3a77947962f2c","tooltips":[],"style":"IPY_MODEL_8684f0945a9048019a3165273fa674e6","icons":[]}},"7d62968db1ae4f4c8d5e27028e99c6d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_78889cdf217643fa9f4d114f1918b2f6","style":"IPY_MODEL_261d645c4aa24c10ad9c02e75ee2d0b0"}},"c3fc421549e7425b815de2a3d01602d1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"825e88947fcc454498b4739c0757c97d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"882d27a063a94986bc304b02c5222b7a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ddcfc3d0e90741c0a6c0b67b47f6f53d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"55172f1685204f24a3b38debc635c6b9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_67d9530cacbf4bbe8144836c57e61acb","IPY_MODEL_e7b43d6a420f46458c199aab46c9eb43","IPY_MODEL_fbf9d80d166744d88c66208824d17c24"],"layout":"IPY_MODEL_19ffca6433c14da198770adae02221be"}},"73bdd9f2969640ddba2a56ae39ceb6b7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"40544637e23545a1a6fc511777301f2d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7d018bb285e1499692cbb241516046f2","tooltips":[],"style":"IPY_MODEL_e2d942ea35174426aa46171c6348c308","icons":[]}},"c847d55d401e46bba108bca1bf8a7770":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1bad4094ead437cbc0eda8372c538a8":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"115453304b8e477a96726060b0c509ad":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"b27b76432a684b6980b5052cadfea618":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eb072c0a62a24f03b150bc624aad5a5d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"bd88f0c19aff4c1cb0bd3a5c52db200b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"139af57eb88742fdaf311e40157b4c1b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9e8426a14afa4c95bf89465efe99089f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"653d6750617f4c788c17ae743b0da13b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"abc4ad768b3d4f75b3f6f8e3d9d3350d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_98d458cfcd874e2c8af3998379e6c432","style":"IPY_MODEL_a7171853339643a48382ec125a26944d"}},"0096a2bb367e4410ab96be94878df836":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_454c2074dba54875b5ee91c45e229169","IPY_MODEL_0a3dc99ab26f42bf90522b4eabb0ad21","IPY_MODEL_d3bb974dd1f0490bb77dffaf8540d439"],"layout":"IPY_MODEL_7ad966747291400d9013a2a2e2b26e10"}},"0c49cc29fbd04b46b38f410912a180d9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4150bb26c66d4de4954e13af8d0cd781":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b3308de4749240c6bcd404cb4caf7ee4","style":"IPY_MODEL_595a260ac98d49e6894496961fa7701c"}},"181192c2388e4db190a751c4042e238a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ab4a49ee5cc4cd2bdc3a7b0cd066e29":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_7ef6892a4e7444458465dd5a5e76fae5","style":"IPY_MODEL_788b34a5563a423798cb54ff8d7b996c"}},"beea94f4506a4e83830588c4d4fcb1c7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a36bb933f92c4ada82504e4c10570057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2d942ea35174426aa46171c6348c308":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"7862a64b0ced43e8b70b7f5684987936":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3ce0440576c4d22a90b74ecfddf9afb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b0d572405b3344278a443aa21138d927","style":"IPY_MODEL_9fe8115b161a4a309887a31b449f2989"}},"7661a6f07c404d3392d0834ebb51f2d5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7af3659f738046f0a562d772fba7aadd","tooltips":[],"style":"IPY_MODEL_5306ed2302184ab8ba22c30999cb5572","icons":[]}},"fd4beb5f2be94c609aed0730b98b9fea":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_d7ab081b539e42649eef86e6f7b6c76d","tooltips":[],"style":"IPY_MODEL_9909b484567e49d3a2b619fec9e125b9","icons":[]}},"c24d9d54deb84bbab0da6405aea82569":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7d018bb285e1499692cbb241516046f2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"012518d9797f4087a352a23bf5ba2aaf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_653d6750617f4c788c17ae743b0da13b","tooltips":[],"style":"IPY_MODEL_1320b18208d0404a8af38e1393051351","icons":[]}},"75ca0d3400af41f0a754c346a121c9b6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_1e2bcb99927b4a8cb5c7dd4eaac39225","tooltips":[],"style":"IPY_MODEL_feeb7fe2ee5a40e196cd16cfb2ae7635","icons":[]}},"7a6c3a89abf64a438aa69a6d0e63782e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_2dc9896b314544f3bd71c32c625e1175","style":"IPY_MODEL_2a7ce010e31c474d834773f51158ad6c"}},"8b544a3eb42548698fec50307ca58cf0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_6ff19e3e507c4bebafd8a1bff6ce55c8","tooltips":[],"style":"IPY_MODEL_cc8a117379724417a5481bb9d17126b5","icons":[]}},"318d9d146d1f41ee9a169043637dadb7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cbbfcbe143644072846912c9d8f1c6d7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5227aa6fa7c749238d811d462cb0fe36":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_44acc8fae0314cb7a33463d2bc6353e7","style":"IPY_MODEL_451cd21ac7b64517b93824dd5ab79460"}},"c80f86a431824631b6626eba7c46fc33":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_17f6fddf67e242588f39e2aaf0558678","style":"IPY_MODEL_da34c9ff8e3b4738a59ec9eb0a39d2cb"}},"47e1703b3d45461f816b4ec1f8ea445a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9d57f12f444b47b58f6982290bc17ba2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b345a2da49d84b559a59792c488d0c1f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"04911938acd2486e8fc0ded740020ea1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ef6892a4e7444458465dd5a5e76fae5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5d8d51ddc216416cb12979d0f38aae5a":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_6542b2868c0c43359d500c3828ef12ef","IPY_MODEL_22483139248d470ca2edbb0b22a669d1","IPY_MODEL_c80f86a431824631b6626eba7c46fc33"],"layout":"IPY_MODEL_952a9f160893406791ec1975a5af971f"}},"4ebfc8728d2c4186a14ab0d9e52ca0c5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6ff19e3e507c4bebafd8a1bff6ce55c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1be32a9a51445f98e99e3b4a2c697bb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a7171853339643a48382ec125a26944d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}}}}},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"2400000"}}},"dependencies":{"lakehouse":{"default_lakehouse":"36ef8bc2-c67a-4512-b060-e25489729c71","default_lakehouse_name":"data","default_lakehouse_workspace_id":"e803987a-98b6-445f-815c-3d15c2c46877","known_lakehouses":[{"id":"7e68da48-69ac-4253-b7bf-1f24863ab25a"},{"id":"1ca5fe82-c7a1-494d-825d-9168c65112d1"},{"id":"36ef8bc2-c67a-4512-b060-e25489729c71"}]},"environment":{"environmentId":"1ae2ef87-3a76-4cd3-90b5-e829f7a4ca9c","workspaceId":"e803987a-98b6-445f-815c-3d15c2c46877"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file From f2a2625afe6ae59a3df7c737cdc9b60365c05312 Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 10:04:17 +0530 Subject: [PATCH 3/8] Delete examples/Fabric/Sample --- examples/Fabric/Sample | 1 - 1 file changed, 1 deletion(-) delete mode 100644 examples/Fabric/Sample diff --git a/examples/Fabric/Sample b/examples/Fabric/Sample deleted file mode 100644 index 5692994f..00000000 --- a/examples/Fabric/Sample +++ /dev/null @@ -1 +0,0 @@ -print("Fabric Notebook") From 2c923d2e08d942e6103ddcbbdf168adfc2b1835a Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 13:01:19 +0530 Subject: [PATCH 4/8] fabric --- examples/fabric | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/fabric diff --git a/examples/fabric b/examples/fabric new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/fabric @@ -0,0 +1 @@ + From a357bf5ce1a40e9e42b13ad68570d8247231d2c1 Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 13:07:10 +0530 Subject: [PATCH 5/8] Delete examples/fabric --- examples/fabric | 1 - 1 file changed, 1 deletion(-) delete mode 100644 examples/fabric diff --git a/examples/fabric b/examples/fabric deleted file mode 100644 index 8b137891..00000000 --- a/examples/fabric +++ /dev/null @@ -1 +0,0 @@ - From 88d7a688b0f7924898da1440f18de830a827af9f Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 13:07:53 +0530 Subject: [PATCH 6/8] Create fabric --- examples/fabric/fabric | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/fabric/fabric diff --git a/examples/fabric/fabric b/examples/fabric/fabric new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/fabric/fabric @@ -0,0 +1 @@ + From 6d35db4593800f9a43c1409aa6216214e99a38e0 Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 13:08:31 +0530 Subject: [PATCH 7/8] Delete examples/Fabric directory --- examples/Fabric/Zingg_Notebook.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 examples/Fabric/Zingg_Notebook.ipynb diff --git a/examples/Fabric/Zingg_Notebook.ipynb b/examples/Fabric/Zingg_Notebook.ipynb deleted file mode 100644 index e0007e1a..00000000 --- a/examples/Fabric/Zingg_Notebook.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"cells":[{"cell_type":"code","source":["#abfss://Test@onelake.dfs.fabric.microsoft.com/ZinggData.Lakehouse/Files/data.csv\n","spark.sparkContext.setCheckpointDir(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":6,"statement_ids":[6],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.7727126Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:45.3551064Z","execution_finish_time":"2024-12-12T14:38:46.1554742Z","parent_msg_id":"0568e5f6-3102-476c-9119-1eea357e5f90"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 6, Finished, Available, Finished)"},"metadata":{}}],"execution_count":2,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"320825db-e1b4-4106-8f77-d974f59e6fe1"},{"cell_type":"code","source":["pip install zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":7,"statement_ids":[7],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.8919804Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:46.9779028Z","execution_finish_time":"2024-12-12T14:38:59.3086347Z","parent_msg_id":"9a6de53a-f5ed-4655-9341-4c4a7802ffe5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 7, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting zingg\n Downloading zingg-0.4.0-py2.py3-none-any.whl.metadata (933 bytes)\nCollecting py4j==0.10.9 (from zingg)\n Downloading py4j-0.10.9-py2.py3-none-any.whl.metadata (1.3 kB)\nDownloading zingg-0.4.0-py2.py3-none-any.whl (74.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.7/74.7 MB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.6/198.6 kB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: py4j, zingg\n Attempting uninstall: py4j\n Found existing installation: py4j 0.10.9.7\n Uninstalling py4j-0.10.9.7:\n Successfully uninstalled py4j-0.10.9.7\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\npyspark 3.5.1.5.4.20240407 requires py4j==0.10.9.7, but you have py4j 0.10.9 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed py4j-0.10.9 zingg-0.4.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":3,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d45194dd-f9fa-4522-9b8d-f68390a36cb0"},{"cell_type":"code","source":["spark.sparkContext.getCheckpointDir()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":8,"statement_ids":[8],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.0470709Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:59.8920089Z","execution_finish_time":"2024-12-12T14:39:00.1425377Z","parent_msg_id":"a7a3e48d-4f55-4dcc-94db-21864a32cdab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 8, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"'abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/b2adeefa-d873-4af7-9780-3af8598f5959'"},"metadata":{}}],"execution_count":4,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"735117dc-0f56-491b-a805-a16db331c90d"},{"cell_type":"code","source":["pip show zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":9,"statement_ids":[9],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.2324828Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:00.6902784Z","execution_finish_time":"2024-12-12T14:39:04.2406337Z","parent_msg_id":"a041b135-c20d-4db9-9e2b-b8b4718c42dc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 9, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: zingg\r\nVersion: 0.4.0\r\nSummary: Zingg Entity Resolution, Data Mastering and Deduplication\r\nHome-page: https://github.com/zinggAI/zingg\r\nAuthor: Zingg.AI\r\nAuthor-email: sonalgoyal4@gmail.com\r\nLicense: https://github.com/zinggAI/zingg/blob/main/LICENSE\r\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\r\nRequires: py4j\r\nRequired-by: \r\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":5,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"51e5d94a-b1d6-47be-bbf1-98208af1b5d8"},{"cell_type":"code","source":["pip install tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":10,"statement_ids":[10],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.3970144Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:04.8223306Z","execution_finish_time":"2024-12-12T14:39:09.8213294Z","parent_msg_id":"c2bb18f4-faa5-4fc2-b94e-0ccd1e2b6af7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 10, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting tabulate\n Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\nDownloading tabulate-0.9.0-py3-none-any.whl (35 kB)\nInstalling collected packages: tabulate\nSuccessfully installed tabulate-0.9.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":6,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a2e77ae6-eeb2-482f-a47e-8c6ed0e7bb59"},{"cell_type":"code","source":["pip show tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":11,"statement_ids":[11],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.5376703Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:10.4269168Z","execution_finish_time":"2024-12-12T14:39:14.5511724Z","parent_msg_id":"0a38f00a-6e32-4871-aec1-99613a3180bd"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 11, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: tabulate\nVersion: 0.9.0\nSummary: Pretty-print tabular data\nHome-page: \nAuthor: \nAuthor-email: Sergey Astanin \nLicense: MIT\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\nRequires: \nRequired-by: \nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":7,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ed5c6ed3-40ef-4447-ab75-4a6a898814fe"},{"cell_type":"code","source":["##you can change these to the locations of your choice\n","##these are the only two settings that need to change\n","zinggDir = \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/models\"\n","modelId = \"testModelFebrl\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":12,"statement_ids":[12],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.6769995Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.1044655Z","execution_finish_time":"2024-12-12T14:39:15.354016Z","parent_msg_id":"7344a1f2-936d-4266-9e4f-bd76fd51601b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 12, Finished, Available, Finished)"},"metadata":{}}],"execution_count":8,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"c3b77184-4165-495e-b212-521dadef7125"},{"cell_type":"code","source":["## Define constants\n","MARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/marked/\"\n","UNMARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/unmarked/\"\n","\n","# Fill these with your specific details\n","storage_account = \"a1a73dc0-3894-4737-b38c-aa7fea437330\" # Replace with your storage account ID\n","fabric_url = \"dfs.fabric.microsoft.com\"\n","\n","# Updated paths for Microsoft Fabric\n","MARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{MARKED_DIR}\"\n","UNMARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{UNMARKED_DIR}\"\n","\n","## Import necessary libraries\n","import pandas as pd\n","import numpy as np\n","import os\n","import time\n","import uuid\n","from tabulate import tabulate\n","from ipywidgets import widgets, interact, GridspecLayout\n","import base64\n","import pyspark.sql.functions as fn\n","\n","# Import Azure libraries for Fabric\n","from azure.identity import DefaultAzureCredential\n","from azure.storage.filedatalake import DataLakeServiceClient\n","\n","# Zingg libraries\n","from zingg.client import *\n","from zingg.pipes import *\n","\n","# Setup Fabric authentication\n","def get_service_client():\n"," credential = DefaultAzureCredential()\n"," service_client = DataLakeServiceClient(\n"," account_url=f\"https://{storage_account}.dfs.fabric.microsoft.com\",\n"," credential=credential,\n"," )\n"," return service_client\n","\n","service_client = get_service_client()\n","\n","# Function to clean model directories in Fabric\n","def cleanModel():\n"," try:\n"," # Access the file system\n"," file_system_client = service_client.get_file_system_client(file_system=storage_account)\n"," \n"," # Remove marked directory\n"," if file_system_client.get_directory_client(MARKED_DIR).exists():\n"," file_system_client.get_directory_client(MARKED_DIR).delete_directory()\n"," \n"," # Remove unmarked directory\n"," if file_system_client.get_directory_client(UNMARKED_DIR).exists():\n"," file_system_client.get_directory_client(UNMARKED_DIR).delete_directory()\n"," \n"," print(\"Model cleaned successfully.\")\n"," except Exception as e:\n"," print(f\"Error cleaning model: {str(e)}\")\n"," return\n","\n","# Function to assign label to a candidate pair\n","def assign_label(candidate_pairs_pd, z_cluster, label):\n"," '''\n"," The purpose of this function is to assign a label to a candidate pair\n"," identified by its z_cluster value. Valid labels include:\n"," 0 - not matched\n"," 1 - matched\n"," 2 - uncertain\n"," '''\n"," # Assign label\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster'] == z_cluster, 'z_isMatch'] = label\n"," return\n","\n","# Function to count labeled pairs\n","def count_labeled_pairs(marked_pd):\n"," '''\n"," The purpose of this function is to count the labeled pairs in the marked folder.\n"," '''\n"," n_total = len(np.unique(marked_pd['z_cluster']))\n"," n_positive = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 1]['z_cluster']))\n"," n_negative = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 0]['z_cluster']))\n","\n"," return n_positive, n_negative, n_total\n","\n","# Setup interactive widget\n","available_labels = {\n"," 'No Match': 0,\n"," 'Match': 1,\n"," 'Uncertain': 2\n","}\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":13,"statement_ids":[13],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.7920676Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.9184099Z","execution_finish_time":"2024-12-12T14:39:16.7144224Z","parent_msg_id":"c47972cc-56fd-46a9-80fe-da0d20234a5d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 13, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/context.py:113: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n"]}],"execution_count":9,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"fd229c4c-6376-4f4b-89c3-14f78822eef8"},{"cell_type":"code","source":["#build the arguments for zingg\n","args = Arguments()\n","# Set the modelid and the zingg dir. You can use this as is\n","args.setModelId(modelId)\n","args.setZinggDir(zinggDir)\n","print(args)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":14,"statement_ids":[14],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.916886Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:17.2999881Z","execution_finish_time":"2024-12-12T14:39:17.5431547Z","parent_msg_id":"c783d3fd-b7fa-4591-9771-32d42753ddd9"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 14, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\n"]}],"execution_count":10,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"f92fe414-811a-4e02-b11e-9711539d1786"},{"cell_type":"code","source":["# Import pandas\n","import pandas as pd\n","\n","# Define the schema (optional for validation)\n","schema = [\"id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","\n","# Load the CSV file\n","data = pd.read_csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\")\n","\n","# Ensure column names match the schema\n","data.columns = schema # Adjust only if the file's column names differ\n","\n","# Display the data\n","data.head()\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":15,"statement_ids":[15],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.0524493Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:18.126005Z","execution_finish_time":"2024-12-12T14:39:19.6523511Z","parent_msg_id":"619a3f46-252d-4b59-849e-69081583ed29"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 15, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":37,"data":{"text/plain":" id fname lname stNo add1 add2 \\\n0 rec-1021-dup-0 thomas george 1 mcmanus place stoney creek \n1 rec-1021-org thomas george 1 mcmanus place north turramurra \n2 rec-1022-dup-0 jackson eglinton 840 fowles street mountview \n3 rec-1022-dup-1 jackson eglinton 840 fowles street moun tjiew \n4 rec-1022-dup-2 jackson eglinton 840 fowles street mou nview \n\n city state dob ssn \n0 3130 sa 19630225 5460534 \n1 3130 sa 19630225 5460534 \n2 2803 sa 19830807 2932837 \n3 2830 sa 19830807 2932837 \n4 2830 sa 19830807 2932837 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idfnamelnamestNoadd1add2citystatedobssn
0rec-1021-dup-0thomasgeorge1mcmanus placestoney creek3130sa196302255460534
1rec-1021-orgthomasgeorge1mcmanus placenorth turramurra3130sa196302255460534
2rec-1022-dup-0jacksoneglinton840fowles streetmountview2803sa198308072932837
3rec-1022-dup-1jacksoneglinton840fowles streetmoun tjiew2830sa198308072932837
4rec-1022-dup-2jacksoneglinton840fowles streetmou nview2830sa198308072932837
\n
"},"metadata":{}}],"execution_count":11,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a76f4324-ff22-46e1-81b5-16f97ab2835d"},{"cell_type":"code","source":["schema = \"rec_id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, dob string, ssn string\"\n","inputPipe = CsvPipe(\"testFebrl\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\", schema)\n","\n","args.setData(inputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":16,"statement_ids":[16],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.2025787Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:20.2434395Z","execution_finish_time":"2024-12-12T14:39:20.4955338Z","parent_msg_id":"5c8d332f-c5a9-4782-8aa7-923604a75d86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 16, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["set schema \n"]}],"execution_count":12,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d9ed37ff-f408-4f87-bda0-161ad35946fb"},{"cell_type":"code","source":["#setting outputpipe in 'args'\n","outputPipe = CsvPipe(\"resultOutput\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")\n","args.setOutput(outputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":17,"statement_ids":[17],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.3319598Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.0521349Z","execution_finish_time":"2024-12-12T14:39:21.3077047Z","parent_msg_id":"edd9e63e-2f5a-41f8-aec9-be73e860542d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 17, Finished, Available, Finished)"},"metadata":{}}],"execution_count":13,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"3c49f24d-2f15-43e6-8c73-7b77c1199845"},{"cell_type":"code","source":["# Set field definitions\n","rec_id = FieldDefinition(\"rec_id\", \"string\", MatchType.EXACT) # ID should use exact match\n","fname = FieldDefinition(\"fname\", \"string\", MatchType.FUZZY) # First Name\n","lname = FieldDefinition(\"lname\", \"string\", MatchType.FUZZY) # Last Name\n","stNo = FieldDefinition(\"stNo\", \"string\", MatchType.FUZZY) # Street Number\n","add1 = FieldDefinition(\"add1\", \"string\", MatchType.FUZZY) # Address Line 1\n","add2 = FieldDefinition(\"add2\", \"string\", MatchType.FUZZY) # Address Line 2\n","city = FieldDefinition(\"city\", \"string\", MatchType.FUZZY) # City\n","state = FieldDefinition(\"state\", \"string\", MatchType.FUZZY) # State\n","dob = FieldDefinition(\"dob\", \"string\", MatchType.EXACT) # Date of Birth (prefer exact match)\n","ssn = FieldDefinition(\"ssn\", \"string\", MatchType.EXACT) # SSN (should use exact match)\n","\n","# Create the field definitions list\n","fieldDefs = [rec_id, fname, lname, stNo, add1, add2, city, state, dob, ssn]\n","\n","# Set field definitions in args\n","args.setFieldDefinition(fieldDefs)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":18,"statement_ids":[18],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.4720722Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.8641221Z","execution_finish_time":"2024-12-12T14:39:22.1346071Z","parent_msg_id":"71227dea-6926-4e14-9e66-501b8515fa5a"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 18, Finished, Available, Finished)"},"metadata":{}}],"execution_count":14,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"76edaab7-d705-4d05-adaa-298b48f87ae6"},{"cell_type":"code","source":["# The numPartitions define how data is split across the cluster. \n","# Please change the fllowing as per your data and cluster size by referring to the docs.\n","\n","args.setNumPartitions(4)\n","args.setLabelDataSampleSize(0.5)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":19,"statement_ids":[19],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.5771016Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:22.6870105Z","execution_finish_time":"2024-12-12T14:39:23.1094802Z","parent_msg_id":"133bf47a-3e2c-4a69-b874-b68bd3fd0f94"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 19, Finished, Available, Finished)"},"metadata":{}}],"execution_count":15,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ea3a596e-0571-4149-9b5b-d8357226d90c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"findTrainingData\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","print(args)\n","print(options)\n","print(zingg)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":20,"statement_ids":[20],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.7720589Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:23.6806377Z","execution_finish_time":"2024-12-12T14:39:40.4666332Z","parent_msg_id":"88db0a89-5777-4e74-92c3-15e9a461056f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 20, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'findTrainingData']\narguments for client options are ['--phase', 'findTrainingData', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n\n\n\n"]}],"execution_count":16,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"92238689-3e1c-4b32-9802-c59c714aa6d2"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"label\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.init()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":21,"statement_ids":[21],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.8921439Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.0118438Z","execution_finish_time":"2024-12-12T14:39:41.2588634Z","parent_msg_id":"9f835445-3575-444e-be68-698c87047cfa"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 21, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'label']\narguments for client options are ['--phase', 'label', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":17,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"b30911c2-9663-4260-8952-c9e5e0d668ea"},{"cell_type":"code","source":["# get candidate pairs\n","candidate_pairs_pd = getPandasDfFromDs(zingg.getUnmarkedRecords())\n"," \n","# if no candidate pairs, run job and wait\n","if candidate_pairs_pd.shape[0] == 0:\n"," print('No unlabeled candidate pairs found. Run findTraining job ...')\n","\n","else:\n"," # get list of pairs (as identified by z_cluster) to label \n"," z_clusters = list(np.unique(candidate_pairs_pd['z_cluster'])) \n","\n"," # identify last reviewed cluster\n"," last_z_cluster = '' # none yet\n","\n"," # print candidate pair stats\n"," print('{0} candidate pairs found for labeling'.format(len(z_clusters)))"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":22,"statement_ids":[22],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.1173535Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.8216531Z","execution_finish_time":"2024-12-12T14:39:44.3102558Z","parent_msg_id":"6d386eec-27ed-4ac8-8c59-e45bcfa62cc5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 22, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["15 candidate pairs found for labeling\n"]}],"execution_count":18,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e303305a-e747-4807-a788-beecde020545"},{"cell_type":"code","source":["# Label Training Set\n","\n","# define variable to avoid duplicate saves\n","ready_for_save = False\n","print(candidate_pairs_pd)\n","\n","# user-friendly labels and corresponding zingg numerical value\n","# (the order in the dictionary affects how displayed below)\n","LABELS = {\n"," 'Uncertain':2,\n"," 'Match':1,\n"," 'No Match':0 \n"," }\n","\n","# GET CANDIDATE PAIRS\n","# ========================================================\n","#candidate_pairs_pd = get_candidate_pairs()\n","n_pairs = int(candidate_pairs_pd.shape[0]/2)\n","# ========================================================\n","\n","# DEFINE IPYWIDGET DISPLAY\n","# ========================================================\n","display_pd = candidate_pairs_pd.drop(\n"," labels=[\n"," 'z_zid', 'z_prediction', 'z_score', 'z_isMatch', 'z_zsource'\n"," ], \n"," axis=1)\n","\n","# define header to be used with each displayed pair\n","html_prefix = \"

\"\n","html_suffix = \"

\"\n","header = widgets.HTML(value=f\"{html_prefix}\" + \"
\".join([str(i)+\"  \" for i in display_pd.columns.to_list()]) + f\"
{html_suffix}\")\n","\n","# initialize display\n","vContainers = []\n","vContainers.append(widgets.HTML(value=f'

Indicate if each of the {n_pairs} record pairs is a match or not

'))\n","\n","# for each set of pairs\n","for n in range(n_pairs):\n","\n"," # get candidate records\n"," candidate_left = display_pd.loc[2*n].to_list()\n"," print(candidate_left)\n"," candidate_right = display_pd.loc[(2*n)+1].to_list()\n"," print(candidate_right)\n","\n"," # define grid to hold values\n"," html = ''\n","\n"," for i in range(display_pd.shape[1]):\n","\n"," # get column name\n"," column_name = display_pd.columns[i]\n","\n"," # if field is image\n"," if column_name == 'image_path':\n","\n"," # define row header\n"," html += ''\n"," html += 'image'\n","\n"," # read left image to encoded string\n"," l_endcode = ''\n"," if candidate_left[i] != '':\n"," with open(candidate_left[i], \"rb\") as l_file:\n"," l_encode = base64.b64encode( l_file.read() ).decode()\n","\n"," # read right image to encoded string\n"," r_encode = ''\n"," if candidate_right[i] != '':\n"," with open(candidate_right[i], \"rb\") as r_file:\n"," r_encode = base64.b64encode( r_file.read() ).decode() \n","\n"," # present images\n"," html += f''\n"," html += f''\n"," html += ''\n","\n"," elif column_name != 'image_path': # display text values\n","\n"," if column_name == 'z_cluster': z_cluster = candidate_left[i]\n","\n"," html += ''\n"," html += f'{column_name}'\n"," html += f'{str(candidate_left[i])}'\n"," html += f'{str(candidate_right[i])}'\n"," html += ''\n","\n"," # insert data table\n"," table = widgets.HTML(value=f''+html+'
')\n"," z_cluster = None\n","\n"," # assign label options to pair\n"," label = widgets.ToggleButtons(\n"," options=LABELS.keys(), \n"," button_style='info'\n"," )\n","\n"," # define blank line between displayed pair and next\n"," blankLine=widgets.HTML(value='
')\n","\n"," # append pair, label and blank line to widget structure\n"," vContainers.append(widgets.VBox(children=[table, label, blankLine]))\n","\n","# present widget\n","display(widgets.VBox(children=vContainers))\n","# ========================================================\n","\n","# mark flag to allow save \n","ready_for_save = True\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":23,"statement_ids":[23],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.2971586Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:44.8516182Z","execution_finish_time":"2024-12-12T14:39:45.7453958Z","parent_msg_id":"f4eac308-98ad-4ac2-b881-a6f991545aca"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 23, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":[" z_zid z_cluster z_prediction z_score z_isMatch rec_id \\\n0 34 1734014375837:0 -1.0 0.0 -1 rec-1022-dup-1 \n1 17 1734014375837:0 -1.0 0.0 -1 rec-1029-dup-1 \n2 56 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n3 26 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n4 47 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n5 17 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n6 59 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n7 29 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n8 32 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n9 2 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n10 33 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n11 3 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n12 41 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n13 11 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n14 57 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n15 27 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n16 47 1734014375837:8 -1.0 0.0 -1 rec-1029-dup-1 \n17 34 1734014375837:8 -1.0 0.0 -1 rec-1022-dup-1 \n18 46 1734007288465:0 -1.0 0.0 -1 rec-1029-dup-0 \n19 24 1734007288465:0 -1.0 0.0 -1 rec-1031-dup-0 \n20 48 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n21 18 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n22 24 1734007288465:12 -1.0 0.0 -1 rec-1031-dup-0 \n23 1 1734007288465:12 -1.0 0.0 -1 rec-1021-dup-0 \n24 37 1734007288465:3 -1.0 0.0 -1 rec-1022-dup-4 \n25 20 1734007288465:3 -1.0 0.0 -1 rec-1029-dup-4 \n26 53 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n27 23 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n28 46 1734007288465:8 -1.0 0.0 -1 rec-1029-dup-0 \n29 1 1734007288465:8 -1.0 0.0 -1 rec-1021-dup-0 \n\n fname lname stNo add1 add2 \\\n0 jackson eglinton 840 fowles street moun tjiew \n1 sachin stephenson 81 rose scott circuit cordoba manor \n2 brooklyn naar-caftenas 210 duffy street tourist park \n3 brooklyn naar-caftenas 210 duffy street tourist park \n4 sachin stephenson 81 rose scott circuit cordoba manor \n5 sachin stephenson 81 rose scott circuit cordoba manor \n6 jasmine chang 210 magnolia drive sunset valley \n7 jasmine chang 210 magnolia drive sunset valley \n8 thomas george 1 mcmanus place north turramurra \n9 thomas george 1 mcmanus place north turramurra \n10 jackson eglinton 840 fowles street mountview \n11 jackson eglinton 840 fowles street mountview \n12 xani green 2 phill ip avenue abbey green \n13 xani green 2 phill ip avenue abbey green \n14 zachary mccarthy 134 teal street greenwood \n15 zachary mccarthy 134 teal street greenwood \n16 sachin stephenson 81 rose scott circuit cordoba manor \n17 jackson eglinton 840 fowles street moun tjiew \n18 kylee stephenson 81 rose scott circuit cordoba anor \n19 samantha sabieray 68 quandong street wattle brae \n20 annalise stephenson 81 rose scott circuit cordoba manor \n21 annalise stephenson 81 rose scott circuit cordoba manor \n22 samantha sabieray 68 quandong street wattle brae \n23 thomas george 1 mcmanus place stoney creek \n24 jackson eglinton 840 fowles street mountv iew \n25 kylee stephenson 81 rose scott circuit cordoba manor \n26 emma crossman 53 mcdowall place kellhaven \n27 emma crossman 53 mcdowall place kellhaven \n28 kylee stephenson 81 rose scott circuit cordoba anor \n29 thomas george 1 mcmanus place stoney creek \n\n city state dob ssn z_zsource \n0 2830 sa 19830807 2932837 testFebrl \n1 4226 vic 19461101 4783085 testFebrl \n2 2481 nsw 19840802 3624304 testFebrl \n3 2481 nsw 19840802 3624304 testFebrl \n4 4226 vic 19461101 4783085 testFebrl \n5 4226 vic 19461101 4783085 testFebrl \n6 3021 vic 19930203 4562381 testFebrl \n7 3021 vic 19930203 4562381 testFebrl \n8 3130 sa 19630225 5460534 testFebrl \n9 3130 sa 19630225 5460534 testFebrl \n10 2803 sa 19830807 2932837 testFebrl \n11 2803 sa 19830807 2932837 testFebrl \n12 5108 nsw 19390410 9201057 testFebrl \n13 5108 nsw 19390410 9201057 testFebrl \n14 6024 wa 19860219 3241102 testFebrl \n15 6024 wa 19860219 3241102 testFebrl \n16 4226 vic 19461101 4783085 testFebrl \n17 2830 sa 19830807 2932837 testFebrl \n18 4226 vic 19461101 4783085 testFebrl \n19 4019 wa 19590807 2863290 testFebrl \n20 4226 vic 19461101 4783085 testFebrl \n21 4226 vic 19461101 4783085 testFebrl \n22 4019 wa 19590807 2863290 testFebrl \n23 3130 sa 19630225 5460534 testFebrl \n24 2830 sa 19830807 2932837 testFebrl \n25 4226 vic 19461101 4783085 testFebrl \n26 5608 vic 19391027 3561186 testFebrl \n27 5608 vic 19391027 3561186 testFebrl \n28 4226 vic 19461101 4783085 testFebrl \n29 3130 sa 19630225 5460534 testFebrl \n['1734014375837:0', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734014375837:0', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:1', 'rec-1032-dup-0', ' brooklyn', ' naar-caftenas', ' 210', ' duffy street', ' tourist park', ' 2481', ' nsw', ' 19840802', ' 3624304']\n['1734014375837:1', 'rec-1032-dup-0', 'brooklyn', 'naar-caftenas', '210', 'duffy street', 'tourist park', '2481', 'nsw', '19840802', '3624304']\n['1734014375837:12', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:12', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:16', 'rec-1034-org', ' jasmine', ' chang', ' 210', ' magnolia drive', ' sunset valley', ' 3021', ' vic', ' 19930203', ' 4562381']\n['1734014375837:16', 'rec-1034-org', 'jasmine', 'chang', '210', 'magnolia drive', 'sunset valley', '3021', 'vic', '19930203', '4562381']\n['1734014375837:2', 'rec-1021-org', ' thomas', ' george', ' 1', ' mcmanus place', ' north turramurra', ' 3130', ' sa', ' 19630225', ' 5460534']\n['1734014375837:2', 'rec-1021-org', 'thomas', 'george', '1', 'mcmanus place', 'north turramurra', '3130', 'sa', '19630225', '5460534']\n['1734014375837:3', 'rec-1022-dup-0', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountview', ' 2803', ' sa', ' 19830807', ' 2932837']\n['1734014375837:3', 'rec-1022-dup-0', 'jackson', 'eglinton', '840', 'fowles street', 'mountview', '2803', 'sa', '19830807', '2932837']\n['1734014375837:4', 'rec-1026-dup-0', ' xani', ' green', ' 2', ' phill ip avenue', ' abbey green', ' 5108', ' nsw', ' 19390410', ' 9201057']\n['1734014375837:4', 'rec-1026-dup-0', 'xani', 'green', '2', 'phill ip avenue', 'abbey green', '5108', 'nsw', '19390410', '9201057']\n['1734014375837:7', 'rec-1033-org', ' zachary', ' mccarthy', ' 134', ' teal street', ' greenwood', ' 6024', ' wa', ' 19860219', ' 3241102']\n['1734014375837:7', 'rec-1033-org', 'zachary', 'mccarthy', '134', 'teal street', 'greenwood', '6024', 'wa', '19860219', '3241102']\n['1734014375837:8', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:8', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:0', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:0', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:1', 'rec-1029-dup-2', ' annalise', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:1', 'rec-1029-dup-2', 'annalise', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:12', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:12', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n['1734007288465:3', 'rec-1022-dup-4', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountv iew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:3', 'rec-1029-dup-4', 'kylee', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:4', 'rec-1031-org', ' emma', ' crossman', ' 53', ' mcdowall place', ' kellhaven', ' 5608', ' vic', ' 19391027', ' 3561186']\n['1734007288465:4', 'rec-1031-org', 'emma', 'crossman', '53', 'mcdowall place', 'kellhaven', '5608', 'vic', '19391027', '3561186']\n['1734007288465:8', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:8', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n"]},{"output_type":"display_data","data":{"text/plain":"VBox(children=(HTML(value='

Indicate if each of the 15 record pairs is a match or not

'), VBox(chil…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"01ee458406bc4bc7aae55eb99c0b504b"}},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":24,"statement_ids":[24],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:07.0951338Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:07.7673389Z","execution_finish_time":"2024-12-12T14:40:08.7466527Z","parent_msg_id":"bdc81fed-0318-4c1e-9a05-c19863f74f86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 24, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":25,"statement_ids":[25],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:11.2518685Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:11.8231998Z","execution_finish_time":"2024-12-12T14:40:12.0645572Z","parent_msg_id":"875bd6d4-812c-4287-89ec-65b08d5b15f7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 25, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":26,"statement_ids":[26],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:18.2988145Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:18.8789311Z","execution_finish_time":"2024-12-12T14:40:19.1201871Z","parent_msg_id":"5db081fe-5e88-4519-a2c6-fcc370fbfafc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 26, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":27,"statement_ids":[27],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:42.2210094Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:42.7984267Z","execution_finish_time":"2024-12-12T14:40:43.0525888Z","parent_msg_id":"048f0931-0eaf-4be3-ae1f-cbd4c06d2e9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 27, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":28,"statement_ids":[28],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:43.7678985Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:44.3138165Z","execution_finish_time":"2024-12-12T14:40:44.5580052Z","parent_msg_id":"462f3847-e026-4744-9b81-4435f1c8ad9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 28, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":29,"statement_ids":[29],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:55.8774777Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:56.4326849Z","execution_finish_time":"2024-12-12T14:40:56.7235357Z","parent_msg_id":"16b1eb37-22d6-440f-85ff-57c744336e9f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 29, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":30,"statement_ids":[30],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:03.1431734Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:03.6780666Z","execution_finish_time":"2024-12-12T14:41:03.9184142Z","parent_msg_id":"08566780-4456-4005-be13-646d0df8ca23"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 30, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":31,"statement_ids":[31],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:12.9413749Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:13.5109925Z","execution_finish_time":"2024-12-12T14:41:13.7677758Z","parent_msg_id":"37011b0e-d098-4aa2-b74b-9f7ed8e5092f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 31, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":32,"statement_ids":[32],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:23.0819227Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:23.7271973Z","execution_finish_time":"2024-12-12T14:41:23.9748964Z","parent_msg_id":"00b11703-7206-4822-8eeb-ea326f892b1e"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 32, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":33,"statement_ids":[33],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:31.7381977Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:32.2866112Z","execution_finish_time":"2024-12-12T14:41:32.5342842Z","parent_msg_id":"65cbb945-0a65-4942-bfaa-233cbc4641ee"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 33, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":34,"statement_ids":[34],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:39.941469Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:40.5983996Z","execution_finish_time":"2024-12-12T14:41:40.848122Z","parent_msg_id":"0f447c56-a165-436a-b7a1-7d5096f3f966"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 34, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":35,"statement_ids":[35],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:51.2539429Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:51.8238466Z","execution_finish_time":"2024-12-12T14:41:52.075655Z","parent_msg_id":"09ec44eb-26ef-4d82-b198-22ab624c9ecc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 35, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":36,"statement_ids":[36],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:02.26967Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:02.8636434Z","execution_finish_time":"2024-12-12T14:42:03.1209762Z","parent_msg_id":"d701ef7e-6c03-4f6f-bccc-3d1dd11d246c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 36, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":37,"statement_ids":[37],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:11.285235Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:11.8311926Z","execution_finish_time":"2024-12-12T14:42:12.0650602Z","parent_msg_id":"d3820343-a606-479d-bcfe-9c1da6f2a104"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 37, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":38,"statement_ids":[38],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:20.7858335Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:21.3273077Z","execution_finish_time":"2024-12-12T14:42:21.6218612Z","parent_msg_id":"744f8a1d-0658-4fe8-ba1a-c225cb1f2bf7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 38, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":39,"statement_ids":[39],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:30.8794009Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:31.4177187Z","execution_finish_time":"2024-12-12T14:42:31.6735656Z","parent_msg_id":"34e08c99-8c30-4af2-8fae-fe81e0f51e1b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 39, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":40,"statement_ids":[40],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:41.3482104Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:41.8980878Z","execution_finish_time":"2024-12-12T14:42:42.1374491Z","parent_msg_id":"3daf28a4-fbc8-4efd-a361-7cb4a2d489b4"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 40, Finished, Available, Finished)"},"metadata":{}}],"execution_count":19,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"2fbe3b6c-9a71-4c3f-8cd6-af6eedad956c"},{"cell_type":"code","source":["notebookutils.fs.ls(\"/\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":5,"statement_ids":[5],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:37:55.2180433Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:05.3684078Z","execution_finish_time":"2024-12-12T14:38:08.0399328Z","parent_msg_id":"340db6fd-15b9-49e4-b8d4-124a4cc2f05d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 5, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":7,"data":{"text/plain":"[FileInfo(path=abfss://e803987a-98b6-445f-815c-3d15c2c46877@onelake.dfs.fabric.microsoft.com/36ef8bc2-c67a-4512-b060-e25489729c71, name=36ef8bc2-c67a-4512-b060-e25489729c71, size=0)]"},"metadata":{}}],"execution_count":1,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"77417f1d-c2a6-4160-9b9c-12b0fbee5839"},{"cell_type":"code","source":["if not ready_for_save:\n"," print('No labels have been assigned. Run the previous cell to create candidate pairs and assign labels to them before re-running this cell.')\n","\n","else:\n","\n"," # ASSIGN LABEL VALUE TO CANDIDATE PAIRS IN DATAFRAME\n"," # ========================================================\n"," # for each pair in displayed widget\n"," for pair in vContainers[1:]:\n","\n"," # get pair and assigned label\n"," html_content = pair.children[1].get_interact_value() # the displayed pair as html\n"," user_assigned_label = pair.children[1].get_interact_value() # the assigned label\n","\n"," # extract candidate pair id from html pair content\n"," start = pair.children[0].value.find('data-title=\"')\n"," if start > 0: \n"," start += len('data-title=\"') \n"," end = pair.children[0].value.find('\"', start+2)\n"," pair_id = pair.children[0].value[start:end]\n","\n","\n","\n"," # assign label to candidate pair entry in dataframe\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster']==pair_id, 'z_isMatch'] = LABELS.get(user_assigned_label)\n"," # ========================================================\n","\n"," # SAVE LABELED DATA TO ZINGG FOLDER\n"," # ========================================================\n"," # make target directory if needed\n"," notebookutils.fs.mkdirs(MARKED_DIR)\n"," \n"," # save label assignments\n"," # save labels\n"," zingg.writeLabelledOutputFromPandas(candidate_pairs_pd,args)\n","\n"," # count labels accumulated\n"," marked_pd_df = getPandasDfFromDs(zingg.getMarkedRecords())\n"," n_pos, n_neg, n_tot = count_labeled_pairs(marked_pd_df)\n"," print(f'You have accumulated {n_pos} pairs labeled as positive matches.')\n"," print(f'You have accumulated {n_neg} pairs labeled as not matches.')\n"," print(\"If you need more pairs to label, re-run the cell for 'findTrainingData'\")\n"," # ======================================================== \n","\n"," # save completed\n"," ready_for_save = False"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":41,"statement_ids":[41],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:43:16.772682Z","session_start_time":null,"execution_start_time":"2024-12-12T14:43:17.381583Z","execution_finish_time":"2024-12-12T14:43:31.9046383Z","parent_msg_id":"ed09275a-e109-4cb1-802d-3909c879a2ad"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 41, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py:147: UserWarning: DataFrame constructor is internal. Do not directly use it.\n warnings.warn(\"DataFrame constructor is internal. Do not directly use it.\")\n"]},{"output_type":"stream","name":"stdout","text":["You have accumulated 9 pairs labeled as positive matches.\nYou have accumulated 6 pairs labeled as not matches.\nIf you need more pairs to label, re-run the cell for 'findTrainingData'\n"]}],"execution_count":20,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9795bb7f-cd3e-41c5-98fd-6341129df8e3"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"trainMatch\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":42,"statement_ids":[42],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:49:47.2575582Z","session_start_time":null,"execution_start_time":"2024-12-12T14:49:47.8553896Z","execution_finish_time":"2024-12-12T14:51:37.5141836Z","parent_msg_id":"f77d784e-0276-440c-8113-c6d060096abf"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 42, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'trainMatch']\narguments for client options are ['--phase', 'trainMatch', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":21,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"71928547-bc82-4653-960f-6c376524f651"},{"cell_type":"code","source":["outputDF = spark.read.csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/part-00000-d624fac4-b80c-4f8d-aebc-5d5faf351b8f-c000.csv\")\n","\n","colNames = [\"z_minScore\", \"z_maxScore\", \"z_cluster\", \"rec_id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","outputDF.toDF(*colNames).show(100)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":47,"statement_ids":[47],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:05:16.9588841Z","session_start_time":null,"execution_start_time":"2024-12-12T15:05:17.7549538Z","execution_finish_time":"2024-12-12T15:05:19.4042746Z","parent_msg_id":"f45225e4-62b8-4836-b7d8-bf0d91575730"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 47, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n| z_minScore| z_maxScore|z_cluster| rec_id| fname| lname|stNo| add1| add2|city|state| dob| ssn|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n|0.9999999999995524|0.9999999999995524| 26|rec-1032-dup-0|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999995358|0.9999999999995358| 24|rec-1031-dup-0|samantha| sabieray| 68| quandong street| wattle brae|4019| wa|19590807|2863290|\n|0.9999999977273273|0.9999999977273273| 2| rec-1021-org| thomas| george| 1| mcmanus place|north turramurra|3130| sa|19630225|5460534|\n|0.9999999999997746|0.9999999999997746| 15| rec-1028-org|eglinton| NULL| 24| curriecrescent| woorniyan|3749| qld|19180205|9341716|\n|0.9999999999991117|0.9999999999991117| 18|rec-1029-dup-2|annalise| stephenson| 81|rose scott circuit| cordoba manor|4226| vic|19461101|4783085|\n|0.9999999999991869|0.9999999999991869| 29| rec-1034-org| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999988902|0.9999999999988902| 3|rec-1022-dup-0| jackson| eglinton| 840| fowles street| mountview|2803| sa|19830807|2932837|\n|0.9999999999994619|0.9999999999994619| 19|rec-1029-dup-3| kylee| turale| 81| cordoba manor| ashfield|4226| vic|19461101|4783085|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999969422861|0.9999999969422861| 1|rec-1021-dup-0| thomas| george| 1| mcmanus place| stoney creek|3130| sa|19630225|5460534|\n|0.9999999999990814|0.9999999999990814| 8| rec-1023-org| gianni| matson| 701| willis street| boonoobloo|3101| vic|19410111|2540080|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999994932|0.9999999999994932| 23| rec-1031-org| emma| crossman| 53| mcdowall place| kellhaven|5608| vic|19391027|3561186|\n|0.9999999999995524|0.9999999999995524| 25| rec-1032-org|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999973147|0.9999999999973147| 5|rec-1022-dup-2| jackson| eglinton| 840| fowles street| mou nview|2830| sa|19830807|2932837|\n|0.9999999999991869|0.9999999999991869| 28|rec-1034-dup-0| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999988648708|0.9999999988648708| 0| rec-1020-org| blake| ryan| 4| starling place| berkeley vlge|5412| nsw|19271027|2402765|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n\n"]}],"execution_count":26,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"383bac89-e461-431f-ba14-5ab59941942c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"generateDocs\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":48,"statement_ids":[48],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:06:42.854029Z","session_start_time":null,"execution_start_time":"2024-12-12T15:06:43.5186144Z","execution_finish_time":"2024-12-12T15:06:46.2120472Z","parent_msg_id":"f73996c7-08d7-4621-b654-4975b23615ab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 48, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'generateDocs']\narguments for client options are ['--phase', 'generateDocs', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":27,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"da00dc40-2163-4247-bfef-21fa918ddfdd"},{"cell_type":"code","source":["DOCS_DIR = zinggDir + \"/\" + modelId + \"/docs/\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":50,"statement_ids":[50],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:24.1740612Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:24.7585436Z","execution_finish_time":"2024-12-12T15:11:25.0621234Z","parent_msg_id":"808875a7-ca97-42ba-b75c-ea92d72410a5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 50, Finished, Available, Finished)"},"metadata":{}}],"execution_count":29,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0d4e3074-53a5-44a0-9b48-8f0f76a7c950"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"model.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":51,"statement_ids":[51],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:35.8141287Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:36.3540639Z","execution_finish_time":"2024-12-12T15:11:36.652124Z","parent_msg_id":"81153656-b2b8-4430-bc2a-d385f917e9a2"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 51, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n Zingg Model Documentation\n \n\n\n\n

\n \n\t \n\t\t \t\n\t\t\t\t\n\t\t \t\n\t \n
Unmarked 0/15, Marked 15/15 (9 Matches, 6 Non-Matches, 0 Unsure)
\n

\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Cluster z_score z_isMatch rec_id fname lname stNo add1 add2 city state dob ssn z_zsource
\n 1734007288465:0\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:1\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-2\n\n \n \n annalise\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-2\n\n \n \nannalise\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:12\n \n0\n\n \n \n0\n\n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:3\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-4\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountv iew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-4\n\n \n \nkylee\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:4\n \n0\n\n \n \n1\n\n \n \nrec-1031-org\n\n \n \n emma\n\n \n \n crossman\n\n \n \n 53\n\n \n \n mcdowall place\n\n \n \n kellhaven\n\n \n \n 5608\n\n \n \n vic\n\n \n \n 19391027\n\n \n \n 3561186\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-org\n\n \n \nemma\n\n \n \ncrossman\n\n \n \n53\n\n \n \nmcdowall place\n\n \n \nkellhaven\n\n \n \n5608\n\n \n \nvic\n\n \n \n19391027\n\n \n \n3561186\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:0\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:1\n \n0\n\n \n \n1\n\n \n \nrec-1032-dup-0\n\n \n \nbrooklyn\n\n \n \nnaar-caftenas\n\n \n \n210\n\n \n \nduffy street\n\n \n \ntourist park\n\n \n \n2481\n\n \n \nnsw\n\n \n \n19840802\n\n \n \n3624304\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1032-dup-0\n\n \n \n brooklyn\n\n \n \n naar-caftenas\n\n \n \n 210\n\n \n \n duffy street\n\n \n \n tourist park\n\n \n \n 2481\n\n \n \n nsw\n\n \n \n 19840802\n\n \n \n 3624304\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:12\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:16\n \n0\n\n \n \n1\n\n \n \nrec-1034-org\n\n \n \n jasmine\n\n \n \n chang\n\n \n \n 210\n\n \n \n magnolia drive\n\n \n \n sunset valley\n\n \n \n 3021\n\n \n \n vic\n\n \n \n 19930203\n\n \n \n 4562381\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1034-org\n\n \n \njasmine\n\n \n \nchang\n\n \n \n210\n\n \n \nmagnolia drive\n\n \n \nsunset valley\n\n \n \n3021\n\n \n \nvic\n\n \n \n19930203\n\n \n \n4562381\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:2\n \n0\n\n \n \n1\n\n \n \nrec-1021-org\n\n \n \n thomas\n\n \n \n george\n\n \n \n 1\n\n \n \n mcmanus place\n\n \n \n north turramurra\n\n \n \n 3130\n\n \n \n sa\n\n \n \n 19630225\n\n \n \n 5460534\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-org\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nnorth turramurra\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:3\n \n0\n\n \n \n1\n\n \n \nrec-1022-dup-0\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountview\n\n \n \n 2803\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-0\n\n \n \njackson\n\n \n \neglinton\n\n \n \n840\n\n \n \nfowles street\n\n \n \nmountview\n\n \n \n2803\n\n \n \nsa\n\n \n \n19830807\n\n \n \n2932837\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:4\n \n0\n\n \n \n1\n\n \n \nrec-1026-dup-0\n\n \n \n xani\n\n \n \n green\n\n \n \n 2\n\n \n \n phill ip avenue\n\n \n \n abbey green\n\n \n \n 5108\n\n \n \n nsw\n\n \n \n 19390410\n\n \n \n 9201057\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1026-dup-0\n\n \n \nxani\n\n \n \ngreen\n\n \n \n2\n\n \n \nphill ip avenue\n\n \n \nabbey green\n\n \n \n5108\n\n \n \nnsw\n\n \n \n19390410\n\n \n \n9201057\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:7\n \n0\n\n \n \n1\n\n \n \nrec-1033-org\n\n \n \n zachary\n\n \n \n mccarthy\n\n \n \n 134\n\n \n \n teal street\n\n \n \n greenwood\n\n \n \n 6024\n\n \n \n wa\n\n \n \n 19860219\n\n \n \n 3241102\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1033-org\n\n \n \nzachary\n\n \n \nmccarthy\n\n \n \n134\n\n \n \nteal street\n\n \n \ngreenwood\n\n \n \n6024\n\n \n \nwa\n\n \n \n19860219\n\n \n \n3241102\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n \n\n

\n\n\n"},"metadata":{}}],"execution_count":30,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9e4ad578-f75f-4011-8027-dc565933adc6"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"data.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":52,"statement_ids":[52],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:13:39.3741915Z","session_start_time":null,"execution_start_time":"2024-12-12T15:13:39.95129Z","execution_finish_time":"2024-12-12T15:13:40.2508845Z","parent_msg_id":"e6afa7a6-fd1b-454d-af86-38b6e6686506"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 52, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n\tData Documentation\n\t\n\n\n\t\n\n\t\n\t\t\t\n\t\t\t\n\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\n\t
Field NameField TypeNullable
\n\t\t\t\t\trec_id\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tfname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tlname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstNo\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd1\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd2\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tcity\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstate\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tdob\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tssn\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\n\n\n"},"metadata":{}}],"execution_count":31,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e58aad4c-1ee3-4977-b211-ebeb9d7539c9"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"version_major":2,"version_minor":0,"state":{"0112614dd803438a986c77cfda539dba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cd7680c5c7d54872b46d824dfd45b61f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_04911938acd2486e8fc0ded740020ea1","style":"IPY_MODEL_ad77a508719f4730a16cf01475525150"}},"6f94a4de6db941189e6a0deabf52e2ad":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7f48a6c51c9f458a80deed26ea3b9011","IPY_MODEL_9efc44bbb2af482989a69577c7b793d0","IPY_MODEL_abc4ad768b3d4f75b3f6f8e3d9d3350d"],"layout":"IPY_MODEL_e0d2670f67e34eee81694ce7b7c97cd7"}},"0c26c8827bf54b95a4cc7d119b485e81":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e5b99552291e4649acf8760161e02ad9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"6a13045354274a089c720f0a3f6fc7b7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_a78ca3ab571448c09c99720e6914c9a5","IPY_MODEL_fd4beb5f2be94c609aed0730b98b9fea","IPY_MODEL_2019411034194afc8bea365fa7205623"],"layout":"IPY_MODEL_41e5e2f1dabe421d90c77a0af367cc74"}},"1a16c51638774862acb327afd5a6f057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ae4bd3e8f34741e7b87423cdaf49a198":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"01b2b8f50eb348cf9ee75f3145179cee":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8b71f2fe25b0404faedd772588744c33":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"7f48a6c51c9f458a80deed26ea3b9011":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_9f7543b4d79248bc8ecf6e9ce6bf31cf","style":"IPY_MODEL_241d4546ce8b4f0684be34c8b75eb58f"}},"d3bb974dd1f0490bb77dffaf8540d439":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_47e1703b3d45461f816b4ec1f8ea445a","style":"IPY_MODEL_8b71f2fe25b0404faedd772588744c33"}},"2266b285bd664631a0a6c9e89a35ed51":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"3af6c6b8d18d48ca89cbc4f5299f6f72":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e9d8900ddcf64682bbf5198fbf46f39d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7468229546d94bfcab6525edb9757637","tooltips":[],"style":"IPY_MODEL_f1bad4094ead437cbc0eda8372c538a8","icons":[]}},"63e74252206d4c5db3c7a350096b0435":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4cbbd9bb43ea4bcb82861e22c1478cf3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c26c8827bf54b95a4cc7d119b485e81","style":"IPY_MODEL_db63ca43d6934485987860bb1f441f29"}},"67d9530cacbf4bbe8144836c57e61acb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:81734014375837:8
rec_idrec-1029-dup-1rec-1022-dup-1
fname sachin jackson
lname stephenson eglinton
stNo 81 840
add1 rose scott circuit fowles street
add2 cordoba manor moun tjiew
city 4226 2830
state vic sa
dob 19461101 19830807
ssn 4783085 2932837
","layout":"IPY_MODEL_7862a64b0ced43e8b70b7f5684987936","style":"IPY_MODEL_2d427fa36cec488e8239a8c453efc375"}},"1829f914d5274fcc89106d626e3295de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7a6c3a89abf64a438aa69a6d0e63782e","IPY_MODEL_8b544a3eb42548698fec50307ca58cf0","IPY_MODEL_7ab4a49ee5cc4cd2bdc3a7b0cd066e29"],"layout":"IPY_MODEL_9d57f12f444b47b58f6982290bc17ba2"}},"d973662f8e8d4d80add362dc786e8325":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"ad77a508719f4730a16cf01475525150":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"39cadceacdbc4966a574c52a98c6260d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 6 record pairs is a match or not

","layout":"IPY_MODEL_5694a3ce6d8d4ae4b3022ded67aa7fd6","style":"IPY_MODEL_d973662f8e8d4d80add362dc786e8325"}},"8e9304290aab4a1fa38a89411af22922":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2d427fa36cec488e8239a8c453efc375":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9909b484567e49d3a2b619fec9e125b9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9fe8115b161a4a309887a31b449f2989":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"970014aa3a6b4acb981c239e49b5c8a1":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eedf22cb2361430099f8f6169cb418ea":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_f5e420d27b5d4c92bc8380c01cfa2151","IPY_MODEL_40544637e23545a1a6fc511777301f2d","IPY_MODEL_fcd49a0c3a1342b1bb6473cf90c1b88b"],"layout":"IPY_MODEL_f1be32a9a51445f98e99e3b4a2c697bb"}},"6225593e71364eb181cff48c1cfcfcc2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a78b5089adc74cd896d1e477251a4ac6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5306ed2302184ab8ba22c30999cb5572":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d1ca7f2a677e4e2783d660faee4c4701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_1f1ae689a00642b597a76f6721a06432","style":"IPY_MODEL_fe6677ee651742e1abf26212230c71af"}},"721f29e0f7664888a2936a3ceddafb6d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"23f62e8b7e2e4be1ae544202d2c1d38d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3fc421549e7425b815de2a3d01602d1","style":"IPY_MODEL_7f44c72c66414102acab1c2578025735"}},"4402fa32ec2e4f12afbd61344d431bcc":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"78889cdf217643fa9f4d114f1918b2f6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"083dbadeee3f4683a499f9b612768701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c847d55d401e46bba108bca1bf8a7770","style":"IPY_MODEL_efade4d483f24f349d3d478be973b355"}},"1e2bcb99927b4a8cb5c7dd4eaac39225":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0371cfc91c0d421ab01ddd16b3972743":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_3bda20edce274aa7b1a92b98914530e1","IPY_MODEL_ccbf1dffd785415594fd880aa5cc8edf","IPY_MODEL_498839735d8f40018aca7aac0da8f5c9"],"layout":"IPY_MODEL_25e1281b496a4a958955a4d9091ca382"}},"01ee458406bc4bc7aae55eb99c0b504b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_af7596b42e5c4b9da6a85846c55f2092","IPY_MODEL_e3697e92e3e04c82b865bc3328dcad2b","IPY_MODEL_4c7afd0822eb4871b7708acbfb040fbf","IPY_MODEL_5d8d51ddc216416cb12979d0f38aae5a","IPY_MODEL_4ddf0fd6818343a58cee87bd452691eb","IPY_MODEL_a8bf95eb6af447ee89f946a9b6b4f1a9","IPY_MODEL_0371cfc91c0d421ab01ddd16b3972743","IPY_MODEL_804f5f862a2547cc833f3f27c18d69de","IPY_MODEL_b95905218e04479b8cba30790100004b","IPY_MODEL_55172f1685204f24a3b38debc635c6b9","IPY_MODEL_b47d111ecdf142a9bf96dea7cc00f12e","IPY_MODEL_0096a2bb367e4410ab96be94878df836","IPY_MODEL_9f688658e0a84aab86fb4b6e9b14eeb5","IPY_MODEL_6a13045354274a089c720f0a3f6fc7b7","IPY_MODEL_6f94a4de6db941189e6a0deabf52e2ad","IPY_MODEL_1829f914d5274fcc89106d626e3295de"],"layout":"IPY_MODEL_ddcfc3d0e90741c0a6c0b67b47f6f53d"}},"5423e9abb08d4175a8c593b60b35ad8d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"952a9f160893406791ec1975a5af971f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc724d1ceb584472a158a91de7b17cae":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:41734014375837:4
rec_idrec-1026-dup-0rec-1026-dup-0
fname xanixani
lname greengreen
stNo 22
add1 phill ip avenuephill ip avenue
add2 abbey greenabbey green
city 51085108
state nswnsw
dob 1939041019390410
ssn 92010579201057
","layout":"IPY_MODEL_f596ee340faa4691abdef6d010ff513c","style":"IPY_MODEL_9e7440ae7f6844f3a8c084a8379df095"}},"f75d9074d0674656b77cb99efcbfe37d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"498839735d8f40018aca7aac0da8f5c9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3b9f4a35a1741cdab1b8127376790be","style":"IPY_MODEL_7ec772d0ae8d4365bd39d4a4b8050837"}},"942ce2043b974942801386f7fe813e59":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d7c93338fb5744a98060d36f29894737":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_29bb51c1b4b842d7992d0c6be6e582c8","style":"IPY_MODEL_5250e70ff02e4d219de6502a27b84357"}},"e23cfe9a93804558acc75418021aa409":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:01734014375837:0
rec_idrec-1022-dup-1rec-1029-dup-1
fname jacksonsachin
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 moun tjiewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_a36bb933f92c4ada82504e4c10570057","style":"IPY_MODEL_cbbfcbe143644072846912c9d8f1c6d7"}},"854564d76efa4e17b66c5e86ac9b8783":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_62d1842b557f49399311b9b573dac9d5","style":"IPY_MODEL_abea2c5d5ee14775a1e9c5a025bb83f2"}},"7ad966747291400d9013a2a2e2b26e10":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4c48892283394169b0911d6922a97058":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"56a4135e67644d0a83f0612cfe92fea8":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:161734014375837:16
rec_idrec-1034-orgrec-1034-org
fname jasminejasmine
lname changchang
stNo 210210
add1 magnolia drivemagnolia drive
add2 sunset valleysunset valley
city 30213021
state vicvic
dob 1993020319930203
ssn 45623814562381
","layout":"IPY_MODEL_4ebfc8728d2c4186a14ab0d9e52ca0c5","style":"IPY_MODEL_970014aa3a6b4acb981c239e49b5c8a1"}},"714d113c8c894968a03f8521e9c6bdf7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2019411034194afc8bea365fa7205623":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_08b9883f77f148c0be1916fbe711a94f","style":"IPY_MODEL_a6c854c673a54b54aa8f5894539a717c"}},"6020cfd838a84c38b42baee5e2ab5239":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c3b9f4a35a1741cdab1b8127376790be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f596ee340faa4691abdef6d010ff513c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6cc91e9e20d343679c6c32830b960faa":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"db916c8e786c40abb3db1432a9688e1d":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d7c93338fb5744a98060d36f29894737","IPY_MODEL_279fb85975df426a821e8f7e46c90f25","IPY_MODEL_786c8eb15f0c4f58b458338018aa8e49"],"layout":"IPY_MODEL_ecbd13d9937c463ba6b654348c05dde3"}},"0a1166c59f694b399f6c9bcbb1e6c89a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_6225593e71364eb181cff48c1cfcfcc2","style":"IPY_MODEL_e5b99552291e4649acf8760161e02ad9"}},"454c2074dba54875b5ee91c45e229169":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_270b1bb9c8d740fbb2efecaf2e1f9f9d","style":"IPY_MODEL_8bc2bd72d40d4224a5fff0f2bccdcbd3"}},"18acd101aa8647c39f5a7c247cedf365":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_4c48892283394169b0911d6922a97058","style":"IPY_MODEL_4fdc3a5116b54cb88adc45c257305421"}},"02ccf836a76444bd99fd508ed827e13a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:21734014375837:2
rec_idrec-1021-orgrec-1021-org
fname thomasthomas
lname georgegeorge
stNo 11
add1 mcmanus placemcmanus place
add2 north turramurranorth turramurra
city 31303130
state sasa
dob 1963022519630225
ssn 54605345460534
","layout":"IPY_MODEL_5423e9abb08d4175a8c593b60b35ad8d","style":"IPY_MODEL_d54363eed626420f910bfcfa01b2e420"}},"cc8a117379724417a5481bb9d17126b5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8684f0945a9048019a3165273fa674e6":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"feeb7fe2ee5a40e196cd16cfb2ae7635":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"fcd49a0c3a1342b1bb6473cf90c1b88b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_63e74252206d4c5db3c7a350096b0435","style":"IPY_MODEL_73bdd9f2969640ddba2a56ae39ceb6b7"}},"6722bf94601449c0a162116c1770e74b":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e7b43d6a420f46458c199aab46c9eb43":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7b6b2d02996344f3a8b829ce2ba14026","tooltips":[],"style":"IPY_MODEL_2a82f125b47641b983a65520897e61a9","icons":[]}},"261d645c4aa24c10ad9c02e75ee2d0b0":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"41e5e2f1dabe421d90c77a0af367cc74":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2a571eec79e4117b5c8dcc04d42ea8c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"efade4d483f24f349d3d478be973b355":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4ddf0fd6818343a58cee87bd452691eb":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_56a4135e67644d0a83f0612cfe92fea8","IPY_MODEL_e9d8900ddcf64682bbf5198fbf46f39d","IPY_MODEL_a16fae766e5c4828ac184a17e8da44f9"],"layout":"IPY_MODEL_721f29e0f7664888a2936a3ceddafb6d"}},"a8bf95eb6af447ee89f946a9b6b4f1a9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_02ccf836a76444bd99fd508ed827e13a","IPY_MODEL_9bc94600605c4977ae1694a17888bd17","IPY_MODEL_d1ca7f2a677e4e2783d660faee4c4701"],"layout":"IPY_MODEL_937178220af4423daa2cd35aa8c3263a"}},"937178220af4423daa2cd35aa8c3263a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e3697e92e3e04c82b865bc3328dcad2b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e23cfe9a93804558acc75418021aa409","IPY_MODEL_482b6fc0521849dba90e938d82e68ed5","IPY_MODEL_854564d76efa4e17b66c5e86ac9b8783"],"layout":"IPY_MODEL_beea94f4506a4e83830588c4d4fcb1c7"}},"1320b18208d0404a8af38e1393051351":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2dc9896b314544f3bd71c32c625e1175":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"435029d048944a1d8bfd7f3af18ffeba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"026ce8c3d7e24f86adada904417924cf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_a78b5089adc74cd896d1e477251a4ac6","tooltips":[],"style":"IPY_MODEL_e2385f8daa6b4e8faecbc68192b40d14","icons":[]}},"0a3dc99ab26f42bf90522b4eabb0ad21":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_727805949ef54a7da481fe155bc77b47","tooltips":[],"style":"IPY_MODEL_7a93d4ae0e91471ab30ca90034d9f90c","icons":[]}},"7ec772d0ae8d4365bd39d4a4b8050837":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"786c8eb15f0c4f58b458338018aa8e49":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_32c982d5fd3545ff8e0bc9cbbe3dc90f","style":"IPY_MODEL_0203adb880ca48e1a6ead1b5af804670"}},"abea2c5d5ee14775a1e9c5a025bb83f2":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2f67e4e809494262b3752db712d75ce7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_18acd101aa8647c39f5a7c247cedf365","IPY_MODEL_4093238088364a1b934d6722c9468de8","IPY_MODEL_7d62968db1ae4f4c8d5e27028e99c6d3"],"layout":"IPY_MODEL_fb146a7c62e44aab94d15666c4afb50a"}},"a16fae766e5c4828ac184a17e8da44f9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_9b29c240e7114680978ecef578ce5fd9","style":"IPY_MODEL_fe94e56c365f4bd8afcf9a57eced058e"}},"e1567066674b498ca58437b558f4ee8e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:71734014375837:7
rec_idrec-1033-orgrec-1033-org
fname zacharyzachary
lname mccarthymccarthy
stNo 134134
add1 teal streetteal street
add2 greenwoodgreenwood
city 60246024
state wawa
dob 1986021919860219
ssn 32411023241102
","layout":"IPY_MODEL_c24d9d54deb84bbab0da6405aea82569","style":"IPY_MODEL_6722bf94601449c0a162116c1770e74b"}},"4c7afd0822eb4871b7708acbfb040fbf":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_64f8752992414e9aa3b677911f0d4848","IPY_MODEL_dacefcb9fc10425e80c5233cb0ba4ffd","IPY_MODEL_2757b91608934f0daa7d9f2397a65d8d"],"layout":"IPY_MODEL_514b19922da24f17bb39aa72d78beaf4"}},"9efc44bbb2af482989a69577c7b793d0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_0112614dd803438a986c77cfda539dba","tooltips":[],"style":"IPY_MODEL_825e88947fcc454498b4739c0757c97d","icons":[]}},"afac862e71a043c381874456054c5e41":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9bc94600605c4977ae1694a17888bd17":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_ae4bd3e8f34741e7b87423cdaf49a198","tooltips":[],"style":"IPY_MODEL_4be40990a33d4872871d58e52d09d898","icons":[]}},"e2385f8daa6b4e8faecbc68192b40d14":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"b47d111ecdf142a9bf96dea7cc00f12e":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_10fadcb3c1214044b997e0d2668bd9d3","IPY_MODEL_75ca0d3400af41f0a754c346a121c9b6","IPY_MODEL_91b4da3856884938987c6d2cf5751f9f"],"layout":"IPY_MODEL_8a0d5bc35d6746959993d76e767f4bc8"}},"b72e35612aa7407890a329608f3f0d49":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d2809335c95b4235b0ca86feab6b14d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_f3c9cd7b31a84fb4bd262c69b122e11d","tooltips":[],"style":"IPY_MODEL_8e9304290aab4a1fa38a89411af22922","icons":[]}},"44acc8fae0314cb7a33463d2bc6353e7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3a2907ac772b46ed81c079f41434c74b":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d0d57063e8b144b49970df32c53ce162":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_882d27a063a94986bc304b02c5222b7a","tooltips":[],"style":"IPY_MODEL_0d2c43c11f554f02b9b0e521a02df66f","icons":[]}},"085d7c0804ab4af6bb42b2928a6c2bd5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"98d458cfcd874e2c8af3998379e6c432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3bda20edce274aa7b1a92b98914530e1":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:31734014375837:3
rec_idrec-1022-dup-0rec-1022-dup-0
fname jacksonjackson
lname eglintoneglinton
stNo 840840
add1 fowles streetfowles street
add2 mountviewmountview
city 28032803
state sasa
dob 1983080719830807
ssn 29328372932837
","layout":"IPY_MODEL_181192c2388e4db190a751c4042e238a","style":"IPY_MODEL_eb072c0a62a24f03b150bc624aad5a5d"}},"fe6677ee651742e1abf26212230c71af":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"29bb51c1b4b842d7992d0c6be6e582c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7b6b2d02996344f3a8b829ce2ba14026":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"19ffca6433c14da198770adae02221be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17243a3f0b654e11970f9b5bce82f79c":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d3f5a5077c9b441e832429ae5a364fbc","IPY_MODEL_7661a6f07c404d3392d0834ebb51f2d5","IPY_MODEL_4cbbd9bb43ea4bcb82861e22c1478cf3"],"layout":"IPY_MODEL_1a16c51638774862acb327afd5a6f057"}},"b2130bed69ca4703acb121ebccd506ca":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2a82f125b47641b983a65520897e61a9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9b29c240e7114680978ecef578ce5fd9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5694a3ce6d8d4ae4b3022ded67aa7fd6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d3f5a5077c9b441e832429ae5a364fbc":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_085d7c0804ab4af6bb42b2928a6c2bd5","style":"IPY_MODEL_754c27d772534ecaaedab5591427ca09"}},"db63ca43d6934485987860bb1f441f29":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9f7543b4d79248bc8ecf6e9ce6bf31cf":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"1f1ae689a00642b597a76f6721a06432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0203adb880ca48e1a6ead1b5af804670":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4fdc3a5116b54cb88adc45c257305421":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5e173e9779fd4ca08143464fd42bdf62":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"214f3e7e895d4f54bbaa829b69ca8671":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"081d75be0414491faaccaec2648ddcd9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"514b19922da24f17bb39aa72d78beaf4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"595a260ac98d49e6894496961fa7701c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5250e70ff02e4d219de6502a27b84357":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"fe94e56c365f4bd8afcf9a57eced058e":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"10fadcb3c1214044b997e0d2668bd9d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_805ed2cf73364f13addeaf13a8073620","style":"IPY_MODEL_115453304b8e477a96726060b0c509ad"}},"da34c9ff8e3b4738a59ec9eb0a39d2cb":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"aed62bd42df24b5788b0fa4f6e8fb610":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7f44c72c66414102acab1c2578025735":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"69c523dee7d54c3b8f0620ad2eb6dc51":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_0c96ba84dad84dbfb3b8347e9e7ae748","style":"IPY_MODEL_6020cfd838a84c38b42baee5e2ab5239"}},"25e1281b496a4a958955a4d9091ca382":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"727805949ef54a7da481fe155bc77b47":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b0d572405b3344278a443aa21138d927":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17f6fddf67e242588f39e2aaf0558678":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"91b4da3856884938987c6d2cf5751f9f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_318d9d146d1f41ee9a169043637dadb7","style":"IPY_MODEL_dad9c9e2d53744f4a2284917a78fd931"}},"7a93d4ae0e91471ab30ca90034d9f90c":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"0d2c43c11f554f02b9b0e521a02df66f":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"279fb85975df426a821e8f7e46c90f25":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_9e8426a14afa4c95bf89465efe99089f","tooltips":[],"style":"IPY_MODEL_47acc27c5bb047009eecaa7aa4974cac","icons":[]}},"f3c9cd7b31a84fb4bd262c69b122e11d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a6c854c673a54b54aa8f5894539a717c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c86d53a9d8394704aaa74e27d7569cc0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6542b2868c0c43359d500c3828ef12ef":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:121734014375837:12
rec_idrec-1029-dup-1rec-1029-dup-1
fname sachinsachin
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_3af6c6b8d18d48ca89cbc4f5299f6f72","style":"IPY_MODEL_e2a571eec79e4117b5c8dcc04d42ea8c"}},"dad9c9e2d53744f4a2284917a78fd931":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"708a2ae873f8426fade245382a8c9208":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_cd7680c5c7d54872b46d824dfd45b61f","IPY_MODEL_012518d9797f4087a352a23bf5ba2aaf","IPY_MODEL_4150bb26c66d4de4954e13af8d0cd781"],"layout":"IPY_MODEL_aed62bd42df24b5788b0fa4f6e8fb610"}},"ccbf1dffd785415594fd880aa5cc8edf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_01b2b8f50eb348cf9ee75f3145179cee","tooltips":[],"style":"IPY_MODEL_5e173e9779fd4ca08143464fd42bdf62","icons":[]}},"788b34a5563a423798cb54ff8d7b996c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"804f5f862a2547cc833f3f27c18d69de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_fc724d1ceb584472a158a91de7b17cae","IPY_MODEL_d2809335c95b4235b0ca86feab6b14d1","IPY_MODEL_23f62e8b7e2e4be1ae544202d2c1d38d"],"layout":"IPY_MODEL_714d113c8c894968a03f8521e9c6bdf7"}},"4be40990a33d4872871d58e52d09d898":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"4093238088364a1b934d6722c9468de8":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_17a7abd324054f039724fb423e2a67a4","tooltips":[],"style":"IPY_MODEL_afac862e71a043c381874456054c5e41","icons":[]}},"fb146a7c62e44aab94d15666c4afb50a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0c96ba84dad84dbfb3b8347e9e7ae748":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3308de4749240c6bcd404cb4caf7ee4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22483139248d470ca2edbb0b22a669d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_c86d53a9d8394704aaa74e27d7569cc0","tooltips":[],"style":"IPY_MODEL_77d77f14d7254453909994ace6b43eb5","icons":[]}},"270b1bb9c8d740fbb2efecaf2e1f9f9d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7af3659f738046f0a562d772fba7aadd":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"64f8752992414e9aa3b677911f0d4848":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:11734014375837:1
rec_idrec-1032-dup-0rec-1032-dup-0
fname brooklynbrooklyn
lname naar-caftenasnaar-caftenas
stNo 210210
add1 duffy streetduffy street
add2 tourist parktourist park
city 24812481
state nswnsw
dob 1984080219840802
ssn 36243043624304
","layout":"IPY_MODEL_6cc91e9e20d343679c6c32830b960faa","style":"IPY_MODEL_b345a2da49d84b559a59792c488d0c1f"}},"9e7440ae7f6844f3a8c084a8379df095":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"62d1842b557f49399311b9b573dac9d5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"47acc27c5bb047009eecaa7aa4974cac":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8bc2bd72d40d4224a5fff0f2bccdcbd3":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4abfebecf35e47b8bdab070a428d4a77":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"451cd21ac7b64517b93824dd5ab79460":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2757b91608934f0daa7d9f2397a65d8d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_2292728174764b0bb766d983d2d8f272","style":"IPY_MODEL_2266b285bd664631a0a6c9e89a35ed51"}},"b95905218e04479b8cba30790100004b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e1567066674b498ca58437b558f4ee8e","IPY_MODEL_8d8dc1ef9db8403dbe741141f95578e6","IPY_MODEL_083dbadeee3f4683a499f9b612768701"],"layout":"IPY_MODEL_435029d048944a1d8bfd7f3af18ffeba"}},"754c27d772534ecaaedab5591427ca09":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"26877fd9c74e49a999f8134e2d8a41d2":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_0a1166c59f694b399f6c9bcbb1e6c89a","IPY_MODEL_d0d57063e8b144b49970df32c53ce162","IPY_MODEL_b3ce0440576c4d22a90b74ecfddf9afb"],"layout":"IPY_MODEL_139af57eb88742fdaf311e40157b4c1b"}},"a78ca3ab571448c09c99720e6914c9a5":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_f6f566807665447d8947ef4f1c1cb802","style":"IPY_MODEL_081d75be0414491faaccaec2648ddcd9"}},"482b6fc0521849dba90e938d82e68ed5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b72e35612aa7407890a329608f3f0d49","tooltips":[],"style":"IPY_MODEL_f75d9074d0674656b77cb99efcbfe37d","icons":[]}},"2a7ce010e31c474d834773f51158ad6c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"32c982d5fd3545ff8e0bc9cbbe3dc90f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"dacefcb9fc10425e80c5233cb0ba4ffd":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_22aaffab00674834860abe4b7df78f36","tooltips":[],"style":"IPY_MODEL_3a2907ac772b46ed81c079f41434c74b","icons":[]}},"f5e420d27b5d4c92bc8380c01cfa2151":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_b2130bed69ca4703acb121ebccd506ca","style":"IPY_MODEL_942ce2043b974942801386f7fe813e59"}},"77d77f14d7254453909994ace6b43eb5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"f6f566807665447d8947ef4f1c1cb802":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ecbd13d9937c463ba6b654348c05dde3":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8a0d5bc35d6746959993d76e767f4bc8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"805ed2cf73364f13addeaf13a8073620":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22aaffab00674834860abe4b7df78f36":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc7bff94e2684f51b8ff148cdf04d0ff":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_39cadceacdbc4966a574c52a98c6260d","IPY_MODEL_17243a3f0b654e11970f9b5bce82f79c","IPY_MODEL_26877fd9c74e49a999f8134e2d8a41d2","IPY_MODEL_eedf22cb2361430099f8f6169cb418ea","IPY_MODEL_708a2ae873f8426fade245382a8c9208","IPY_MODEL_2f67e4e809494262b3752db712d75ce7","IPY_MODEL_db916c8e786c40abb3db1432a9688e1d"],"layout":"IPY_MODEL_214f3e7e895d4f54bbaa829b69ca8671"}},"9f688658e0a84aab86fb4b6e9b14eeb5":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_69c523dee7d54c3b8f0620ad2eb6dc51","IPY_MODEL_026ce8c3d7e24f86adada904417924cf","IPY_MODEL_5227aa6fa7c749238d811d462cb0fe36"],"layout":"IPY_MODEL_bd88f0c19aff4c1cb0bd3a5c52db200b"}},"d7ab081b539e42649eef86e6f7b6c76d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b59772ab1d914a24bcb3a77947962f2c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"08b9883f77f148c0be1916fbe711a94f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7468229546d94bfcab6525edb9757637":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17a7abd324054f039724fb423e2a67a4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fbf9d80d166744d88c66208824d17c24":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c49cc29fbd04b46b38f410912a180d9","style":"IPY_MODEL_b27b76432a684b6980b5052cadfea618"}},"e0d2670f67e34eee81694ce7b7c97cd7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d54363eed626420f910bfcfa01b2e420":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"241d4546ce8b4f0684be34c8b75eb58f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2292728174764b0bb766d983d2d8f272":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"af7596b42e5c4b9da6a85846c55f2092":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 15 record pairs is a match or not

","layout":"IPY_MODEL_4abfebecf35e47b8bdab070a428d4a77","style":"IPY_MODEL_4402fa32ec2e4f12afbd61344d431bcc"}},"8d8dc1ef9db8403dbe741141f95578e6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b59772ab1d914a24bcb3a77947962f2c","tooltips":[],"style":"IPY_MODEL_8684f0945a9048019a3165273fa674e6","icons":[]}},"7d62968db1ae4f4c8d5e27028e99c6d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_78889cdf217643fa9f4d114f1918b2f6","style":"IPY_MODEL_261d645c4aa24c10ad9c02e75ee2d0b0"}},"c3fc421549e7425b815de2a3d01602d1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"825e88947fcc454498b4739c0757c97d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"882d27a063a94986bc304b02c5222b7a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ddcfc3d0e90741c0a6c0b67b47f6f53d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"55172f1685204f24a3b38debc635c6b9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_67d9530cacbf4bbe8144836c57e61acb","IPY_MODEL_e7b43d6a420f46458c199aab46c9eb43","IPY_MODEL_fbf9d80d166744d88c66208824d17c24"],"layout":"IPY_MODEL_19ffca6433c14da198770adae02221be"}},"73bdd9f2969640ddba2a56ae39ceb6b7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"40544637e23545a1a6fc511777301f2d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7d018bb285e1499692cbb241516046f2","tooltips":[],"style":"IPY_MODEL_e2d942ea35174426aa46171c6348c308","icons":[]}},"c847d55d401e46bba108bca1bf8a7770":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1bad4094ead437cbc0eda8372c538a8":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"115453304b8e477a96726060b0c509ad":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"b27b76432a684b6980b5052cadfea618":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eb072c0a62a24f03b150bc624aad5a5d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"bd88f0c19aff4c1cb0bd3a5c52db200b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"139af57eb88742fdaf311e40157b4c1b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9e8426a14afa4c95bf89465efe99089f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"653d6750617f4c788c17ae743b0da13b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"abc4ad768b3d4f75b3f6f8e3d9d3350d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_98d458cfcd874e2c8af3998379e6c432","style":"IPY_MODEL_a7171853339643a48382ec125a26944d"}},"0096a2bb367e4410ab96be94878df836":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_454c2074dba54875b5ee91c45e229169","IPY_MODEL_0a3dc99ab26f42bf90522b4eabb0ad21","IPY_MODEL_d3bb974dd1f0490bb77dffaf8540d439"],"layout":"IPY_MODEL_7ad966747291400d9013a2a2e2b26e10"}},"0c49cc29fbd04b46b38f410912a180d9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4150bb26c66d4de4954e13af8d0cd781":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b3308de4749240c6bcd404cb4caf7ee4","style":"IPY_MODEL_595a260ac98d49e6894496961fa7701c"}},"181192c2388e4db190a751c4042e238a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ab4a49ee5cc4cd2bdc3a7b0cd066e29":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_7ef6892a4e7444458465dd5a5e76fae5","style":"IPY_MODEL_788b34a5563a423798cb54ff8d7b996c"}},"beea94f4506a4e83830588c4d4fcb1c7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a36bb933f92c4ada82504e4c10570057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2d942ea35174426aa46171c6348c308":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"7862a64b0ced43e8b70b7f5684987936":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3ce0440576c4d22a90b74ecfddf9afb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b0d572405b3344278a443aa21138d927","style":"IPY_MODEL_9fe8115b161a4a309887a31b449f2989"}},"7661a6f07c404d3392d0834ebb51f2d5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7af3659f738046f0a562d772fba7aadd","tooltips":[],"style":"IPY_MODEL_5306ed2302184ab8ba22c30999cb5572","icons":[]}},"fd4beb5f2be94c609aed0730b98b9fea":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_d7ab081b539e42649eef86e6f7b6c76d","tooltips":[],"style":"IPY_MODEL_9909b484567e49d3a2b619fec9e125b9","icons":[]}},"c24d9d54deb84bbab0da6405aea82569":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7d018bb285e1499692cbb241516046f2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"012518d9797f4087a352a23bf5ba2aaf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_653d6750617f4c788c17ae743b0da13b","tooltips":[],"style":"IPY_MODEL_1320b18208d0404a8af38e1393051351","icons":[]}},"75ca0d3400af41f0a754c346a121c9b6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_1e2bcb99927b4a8cb5c7dd4eaac39225","tooltips":[],"style":"IPY_MODEL_feeb7fe2ee5a40e196cd16cfb2ae7635","icons":[]}},"7a6c3a89abf64a438aa69a6d0e63782e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_2dc9896b314544f3bd71c32c625e1175","style":"IPY_MODEL_2a7ce010e31c474d834773f51158ad6c"}},"8b544a3eb42548698fec50307ca58cf0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_6ff19e3e507c4bebafd8a1bff6ce55c8","tooltips":[],"style":"IPY_MODEL_cc8a117379724417a5481bb9d17126b5","icons":[]}},"318d9d146d1f41ee9a169043637dadb7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cbbfcbe143644072846912c9d8f1c6d7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5227aa6fa7c749238d811d462cb0fe36":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_44acc8fae0314cb7a33463d2bc6353e7","style":"IPY_MODEL_451cd21ac7b64517b93824dd5ab79460"}},"c80f86a431824631b6626eba7c46fc33":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_17f6fddf67e242588f39e2aaf0558678","style":"IPY_MODEL_da34c9ff8e3b4738a59ec9eb0a39d2cb"}},"47e1703b3d45461f816b4ec1f8ea445a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9d57f12f444b47b58f6982290bc17ba2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b345a2da49d84b559a59792c488d0c1f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"04911938acd2486e8fc0ded740020ea1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ef6892a4e7444458465dd5a5e76fae5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5d8d51ddc216416cb12979d0f38aae5a":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_6542b2868c0c43359d500c3828ef12ef","IPY_MODEL_22483139248d470ca2edbb0b22a669d1","IPY_MODEL_c80f86a431824631b6626eba7c46fc33"],"layout":"IPY_MODEL_952a9f160893406791ec1975a5af971f"}},"4ebfc8728d2c4186a14ab0d9e52ca0c5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6ff19e3e507c4bebafd8a1bff6ce55c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1be32a9a51445f98e99e3b4a2c697bb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a7171853339643a48382ec125a26944d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}}}}},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"2400000"}}},"dependencies":{"lakehouse":{"default_lakehouse":"36ef8bc2-c67a-4512-b060-e25489729c71","default_lakehouse_name":"data","default_lakehouse_workspace_id":"e803987a-98b6-445f-815c-3d15c2c46877","known_lakehouses":[{"id":"7e68da48-69ac-4253-b7bf-1f24863ab25a"},{"id":"1ca5fe82-c7a1-494d-825d-9168c65112d1"},{"id":"36ef8bc2-c67a-4512-b060-e25489729c71"}]},"environment":{"environmentId":"1ae2ef87-3a76-4cd3-90b5-e829f7a4ca9c","workspaceId":"e803987a-98b6-445f-815c-3d15c2c46877"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file From 23b0be513a96c0529524bbec7073bc53ea316d4d Mon Sep 17 00:00:00 2001 From: Arjun-Zingg Date: Fri, 13 Dec 2024 13:09:41 +0530 Subject: [PATCH 8/8] Add files via upload --- examples/fabric/ExampleNotebook.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/fabric/ExampleNotebook.ipynb diff --git a/examples/fabric/ExampleNotebook.ipynb b/examples/fabric/ExampleNotebook.ipynb new file mode 100644 index 00000000..e0007e1a --- /dev/null +++ b/examples/fabric/ExampleNotebook.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"code","source":["#abfss://Test@onelake.dfs.fabric.microsoft.com/ZinggData.Lakehouse/Files/data.csv\n","spark.sparkContext.setCheckpointDir(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":6,"statement_ids":[6],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.7727126Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:45.3551064Z","execution_finish_time":"2024-12-12T14:38:46.1554742Z","parent_msg_id":"0568e5f6-3102-476c-9119-1eea357e5f90"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 6, Finished, Available, Finished)"},"metadata":{}}],"execution_count":2,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"320825db-e1b4-4106-8f77-d974f59e6fe1"},{"cell_type":"code","source":["pip install zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":7,"statement_ids":[7],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:44.8919804Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:46.9779028Z","execution_finish_time":"2024-12-12T14:38:59.3086347Z","parent_msg_id":"9a6de53a-f5ed-4655-9341-4c4a7802ffe5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 7, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting zingg\n Downloading zingg-0.4.0-py2.py3-none-any.whl.metadata (933 bytes)\nCollecting py4j==0.10.9 (from zingg)\n Downloading py4j-0.10.9-py2.py3-none-any.whl.metadata (1.3 kB)\nDownloading zingg-0.4.0-py2.py3-none-any.whl (74.7 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.7/74.7 MB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.6/198.6 kB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: py4j, zingg\n Attempting uninstall: py4j\n Found existing installation: py4j 0.10.9.7\n Uninstalling py4j-0.10.9.7:\n Successfully uninstalled py4j-0.10.9.7\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\npyspark 3.5.1.5.4.20240407 requires py4j==0.10.9.7, but you have py4j 0.10.9 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed py4j-0.10.9 zingg-0.4.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":3,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d45194dd-f9fa-4522-9b8d-f68390a36cb0"},{"cell_type":"code","source":["spark.sparkContext.getCheckpointDir()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":8,"statement_ids":[8],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.0470709Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:59.8920089Z","execution_finish_time":"2024-12-12T14:39:00.1425377Z","parent_msg_id":"a7a3e48d-4f55-4dcc-94db-21864a32cdab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 8, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"'abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/b2adeefa-d873-4af7-9780-3af8598f5959'"},"metadata":{}}],"execution_count":4,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"735117dc-0f56-491b-a805-a16db331c90d"},{"cell_type":"code","source":["pip show zingg"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":9,"statement_ids":[9],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.2324828Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:00.6902784Z","execution_finish_time":"2024-12-12T14:39:04.2406337Z","parent_msg_id":"a041b135-c20d-4db9-9e2b-b8b4718c42dc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 9, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: zingg\r\nVersion: 0.4.0\r\nSummary: Zingg Entity Resolution, Data Mastering and Deduplication\r\nHome-page: https://github.com/zinggAI/zingg\r\nAuthor: Zingg.AI\r\nAuthor-email: sonalgoyal4@gmail.com\r\nLicense: https://github.com/zinggAI/zingg/blob/main/LICENSE\r\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\r\nRequires: py4j\r\nRequired-by: \r\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":5,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"51e5d94a-b1d6-47be-bbf1-98208af1b5d8"},{"cell_type":"code","source":["pip install tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":10,"statement_ids":[10],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.3970144Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:04.8223306Z","execution_finish_time":"2024-12-12T14:39:09.8213294Z","parent_msg_id":"c2bb18f4-faa5-4fc2-b94e-0ccd1e2b6af7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 10, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Collecting tabulate\n Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\nDownloading tabulate-0.9.0-py3-none-any.whl (35 kB)\nInstalling collected packages: tabulate\nSuccessfully installed tabulate-0.9.0\nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":6,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a2e77ae6-eeb2-482f-a47e-8c6ed0e7bb59"},{"cell_type":"code","source":["pip show tabulate"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":11,"statement_ids":[11],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.5376703Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:10.4269168Z","execution_finish_time":"2024-12-12T14:39:14.5511724Z","parent_msg_id":"0a38f00a-6e32-4871-aec1-99613a3180bd"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 11, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Name: tabulate\nVersion: 0.9.0\nSummary: Pretty-print tabular data\nHome-page: \nAuthor: \nAuthor-email: Sergey Astanin \nLicense: MIT\nLocation: /home/trusted-service-user/cluster-env/trident_env/lib/python3.11/site-packages\nRequires: \nRequired-by: \nNote: you may need to restart the kernel to use updated packages.\n"]}],"execution_count":7,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ed5c6ed3-40ef-4447-ab75-4a6a898814fe"},{"cell_type":"code","source":["##you can change these to the locations of your choice\n","##these are the only two settings that need to change\n","zinggDir = \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/models\"\n","modelId = \"testModelFebrl\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":12,"statement_ids":[12],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.6769995Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.1044655Z","execution_finish_time":"2024-12-12T14:39:15.354016Z","parent_msg_id":"7344a1f2-936d-4266-9e4f-bd76fd51601b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 12, Finished, Available, Finished)"},"metadata":{}}],"execution_count":8,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"c3b77184-4165-495e-b212-521dadef7125"},{"cell_type":"code","source":["## Define constants\n","MARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/marked/\"\n","UNMARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/unmarked/\"\n","\n","# Fill these with your specific details\n","storage_account = \"a1a73dc0-3894-4737-b38c-aa7fea437330\" # Replace with your storage account ID\n","fabric_url = \"dfs.fabric.microsoft.com\"\n","\n","# Updated paths for Microsoft Fabric\n","MARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{MARKED_DIR}\"\n","UNMARKED_DIR_DBFS = f\"abfss://{storage_account}@{fabric_url}{UNMARKED_DIR}\"\n","\n","## Import necessary libraries\n","import pandas as pd\n","import numpy as np\n","import os\n","import time\n","import uuid\n","from tabulate import tabulate\n","from ipywidgets import widgets, interact, GridspecLayout\n","import base64\n","import pyspark.sql.functions as fn\n","\n","# Import Azure libraries for Fabric\n","from azure.identity import DefaultAzureCredential\n","from azure.storage.filedatalake import DataLakeServiceClient\n","\n","# Zingg libraries\n","from zingg.client import *\n","from zingg.pipes import *\n","\n","# Setup Fabric authentication\n","def get_service_client():\n"," credential = DefaultAzureCredential()\n"," service_client = DataLakeServiceClient(\n"," account_url=f\"https://{storage_account}.dfs.fabric.microsoft.com\",\n"," credential=credential,\n"," )\n"," return service_client\n","\n","service_client = get_service_client()\n","\n","# Function to clean model directories in Fabric\n","def cleanModel():\n"," try:\n"," # Access the file system\n"," file_system_client = service_client.get_file_system_client(file_system=storage_account)\n"," \n"," # Remove marked directory\n"," if file_system_client.get_directory_client(MARKED_DIR).exists():\n"," file_system_client.get_directory_client(MARKED_DIR).delete_directory()\n"," \n"," # Remove unmarked directory\n"," if file_system_client.get_directory_client(UNMARKED_DIR).exists():\n"," file_system_client.get_directory_client(UNMARKED_DIR).delete_directory()\n"," \n"," print(\"Model cleaned successfully.\")\n"," except Exception as e:\n"," print(f\"Error cleaning model: {str(e)}\")\n"," return\n","\n","# Function to assign label to a candidate pair\n","def assign_label(candidate_pairs_pd, z_cluster, label):\n"," '''\n"," The purpose of this function is to assign a label to a candidate pair\n"," identified by its z_cluster value. Valid labels include:\n"," 0 - not matched\n"," 1 - matched\n"," 2 - uncertain\n"," '''\n"," # Assign label\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster'] == z_cluster, 'z_isMatch'] = label\n"," return\n","\n","# Function to count labeled pairs\n","def count_labeled_pairs(marked_pd):\n"," '''\n"," The purpose of this function is to count the labeled pairs in the marked folder.\n"," '''\n"," n_total = len(np.unique(marked_pd['z_cluster']))\n"," n_positive = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 1]['z_cluster']))\n"," n_negative = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 0]['z_cluster']))\n","\n"," return n_positive, n_negative, n_total\n","\n","# Setup interactive widget\n","available_labels = {\n"," 'No Match': 0,\n"," 'Match': 1,\n"," 'Uncertain': 2\n","}\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":13,"statement_ids":[13],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.7920676Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:15.9184099Z","execution_finish_time":"2024-12-12T14:39:16.7144224Z","parent_msg_id":"c47972cc-56fd-46a9-80fe-da0d20234a5d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 13, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/context.py:113: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n"]}],"execution_count":9,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"fd229c4c-6376-4f4b-89c3-14f78822eef8"},{"cell_type":"code","source":["#build the arguments for zingg\n","args = Arguments()\n","# Set the modelid and the zingg dir. You can use this as is\n","args.setModelId(modelId)\n","args.setZinggDir(zinggDir)\n","print(args)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":14,"statement_ids":[14],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:45.916886Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:17.2999881Z","execution_finish_time":"2024-12-12T14:39:17.5431547Z","parent_msg_id":"c783d3fd-b7fa-4591-9771-32d42753ddd9"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 14, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\n"]}],"execution_count":10,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"f92fe414-811a-4e02-b11e-9711539d1786"},{"cell_type":"code","source":["# Import pandas\n","import pandas as pd\n","\n","# Define the schema (optional for validation)\n","schema = [\"id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","\n","# Load the CSV file\n","data = pd.read_csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\")\n","\n","# Ensure column names match the schema\n","data.columns = schema # Adjust only if the file's column names differ\n","\n","# Display the data\n","data.head()\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":15,"statement_ids":[15],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.0524493Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:18.126005Z","execution_finish_time":"2024-12-12T14:39:19.6523511Z","parent_msg_id":"619a3f46-252d-4b59-849e-69081583ed29"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 15, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":37,"data":{"text/plain":" id fname lname stNo add1 add2 \\\n0 rec-1021-dup-0 thomas george 1 mcmanus place stoney creek \n1 rec-1021-org thomas george 1 mcmanus place north turramurra \n2 rec-1022-dup-0 jackson eglinton 840 fowles street mountview \n3 rec-1022-dup-1 jackson eglinton 840 fowles street moun tjiew \n4 rec-1022-dup-2 jackson eglinton 840 fowles street mou nview \n\n city state dob ssn \n0 3130 sa 19630225 5460534 \n1 3130 sa 19630225 5460534 \n2 2803 sa 19830807 2932837 \n3 2830 sa 19830807 2932837 \n4 2830 sa 19830807 2932837 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idfnamelnamestNoadd1add2citystatedobssn
0rec-1021-dup-0thomasgeorge1mcmanus placestoney creek3130sa196302255460534
1rec-1021-orgthomasgeorge1mcmanus placenorth turramurra3130sa196302255460534
2rec-1022-dup-0jacksoneglinton840fowles streetmountview2803sa198308072932837
3rec-1022-dup-1jacksoneglinton840fowles streetmoun tjiew2830sa198308072932837
4rec-1022-dup-2jacksoneglinton840fowles streetmou nview2830sa198308072932837
\n
"},"metadata":{}}],"execution_count":11,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"a76f4324-ff22-46e1-81b5-16f97ab2835d"},{"cell_type":"code","source":["schema = \"rec_id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, dob string, ssn string\"\n","inputPipe = CsvPipe(\"testFebrl\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/data.csv\", schema)\n","\n","args.setData(inputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":16,"statement_ids":[16],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.2025787Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:20.2434395Z","execution_finish_time":"2024-12-12T14:39:20.4955338Z","parent_msg_id":"5c8d332f-c5a9-4782-8aa7-923604a75d86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 16, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["set schema \n"]}],"execution_count":12,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d9ed37ff-f408-4f87-bda0-161ad35946fb"},{"cell_type":"code","source":["#setting outputpipe in 'args'\n","outputPipe = CsvPipe(\"resultOutput\", \"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files\")\n","args.setOutput(outputPipe)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":17,"statement_ids":[17],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.3319598Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.0521349Z","execution_finish_time":"2024-12-12T14:39:21.3077047Z","parent_msg_id":"edd9e63e-2f5a-41f8-aec9-be73e860542d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 17, Finished, Available, Finished)"},"metadata":{}}],"execution_count":13,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"3c49f24d-2f15-43e6-8c73-7b77c1199845"},{"cell_type":"code","source":["# Set field definitions\n","rec_id = FieldDefinition(\"rec_id\", \"string\", MatchType.EXACT) # ID should use exact match\n","fname = FieldDefinition(\"fname\", \"string\", MatchType.FUZZY) # First Name\n","lname = FieldDefinition(\"lname\", \"string\", MatchType.FUZZY) # Last Name\n","stNo = FieldDefinition(\"stNo\", \"string\", MatchType.FUZZY) # Street Number\n","add1 = FieldDefinition(\"add1\", \"string\", MatchType.FUZZY) # Address Line 1\n","add2 = FieldDefinition(\"add2\", \"string\", MatchType.FUZZY) # Address Line 2\n","city = FieldDefinition(\"city\", \"string\", MatchType.FUZZY) # City\n","state = FieldDefinition(\"state\", \"string\", MatchType.FUZZY) # State\n","dob = FieldDefinition(\"dob\", \"string\", MatchType.EXACT) # Date of Birth (prefer exact match)\n","ssn = FieldDefinition(\"ssn\", \"string\", MatchType.EXACT) # SSN (should use exact match)\n","\n","# Create the field definitions list\n","fieldDefs = [rec_id, fname, lname, stNo, add1, add2, city, state, dob, ssn]\n","\n","# Set field definitions in args\n","args.setFieldDefinition(fieldDefs)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":18,"statement_ids":[18],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.4720722Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:21.8641221Z","execution_finish_time":"2024-12-12T14:39:22.1346071Z","parent_msg_id":"71227dea-6926-4e14-9e66-501b8515fa5a"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 18, Finished, Available, Finished)"},"metadata":{}}],"execution_count":14,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"76edaab7-d705-4d05-adaa-298b48f87ae6"},{"cell_type":"code","source":["# The numPartitions define how data is split across the cluster. \n","# Please change the fllowing as per your data and cluster size by referring to the docs.\n","\n","args.setNumPartitions(4)\n","args.setLabelDataSampleSize(0.5)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":19,"statement_ids":[19],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.5771016Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:22.6870105Z","execution_finish_time":"2024-12-12T14:39:23.1094802Z","parent_msg_id":"133bf47a-3e2c-4a69-b874-b68bd3fd0f94"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 19, Finished, Available, Finished)"},"metadata":{}}],"execution_count":15,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ea3a596e-0571-4149-9b5b-d8357226d90c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"findTrainingData\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","print(args)\n","print(options)\n","print(zingg)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":20,"statement_ids":[20],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.7720589Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:23.6806377Z","execution_finish_time":"2024-12-12T14:39:40.4666332Z","parent_msg_id":"88db0a89-5777-4e74-92c3-15e9a461056f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 20, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'findTrainingData']\narguments for client options are ['--phase', 'findTrainingData', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n\n\n\n"]}],"execution_count":16,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"92238689-3e1c-4b32-9802-c59c714aa6d2"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"label\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.init()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":21,"statement_ids":[21],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:46.8921439Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.0118438Z","execution_finish_time":"2024-12-12T14:39:41.2588634Z","parent_msg_id":"9f835445-3575-444e-be68-698c87047cfa"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 21, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'label']\narguments for client options are ['--phase', 'label', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":17,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"b30911c2-9663-4260-8952-c9e5e0d668ea"},{"cell_type":"code","source":["# get candidate pairs\n","candidate_pairs_pd = getPandasDfFromDs(zingg.getUnmarkedRecords())\n"," \n","# if no candidate pairs, run job and wait\n","if candidate_pairs_pd.shape[0] == 0:\n"," print('No unlabeled candidate pairs found. Run findTraining job ...')\n","\n","else:\n"," # get list of pairs (as identified by z_cluster) to label \n"," z_clusters = list(np.unique(candidate_pairs_pd['z_cluster'])) \n","\n"," # identify last reviewed cluster\n"," last_z_cluster = '' # none yet\n","\n"," # print candidate pair stats\n"," print('{0} candidate pairs found for labeling'.format(len(z_clusters)))"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":22,"statement_ids":[22],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.1173535Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:41.8216531Z","execution_finish_time":"2024-12-12T14:39:44.3102558Z","parent_msg_id":"6d386eec-27ed-4ac8-8c59-e45bcfa62cc5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 22, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["15 candidate pairs found for labeling\n"]}],"execution_count":18,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e303305a-e747-4807-a788-beecde020545"},{"cell_type":"code","source":["# Label Training Set\n","\n","# define variable to avoid duplicate saves\n","ready_for_save = False\n","print(candidate_pairs_pd)\n","\n","# user-friendly labels and corresponding zingg numerical value\n","# (the order in the dictionary affects how displayed below)\n","LABELS = {\n"," 'Uncertain':2,\n"," 'Match':1,\n"," 'No Match':0 \n"," }\n","\n","# GET CANDIDATE PAIRS\n","# ========================================================\n","#candidate_pairs_pd = get_candidate_pairs()\n","n_pairs = int(candidate_pairs_pd.shape[0]/2)\n","# ========================================================\n","\n","# DEFINE IPYWIDGET DISPLAY\n","# ========================================================\n","display_pd = candidate_pairs_pd.drop(\n"," labels=[\n"," 'z_zid', 'z_prediction', 'z_score', 'z_isMatch', 'z_zsource'\n"," ], \n"," axis=1)\n","\n","# define header to be used with each displayed pair\n","html_prefix = \"

\"\n","html_suffix = \"

\"\n","header = widgets.HTML(value=f\"{html_prefix}\" + \"
\".join([str(i)+\"  \" for i in display_pd.columns.to_list()]) + f\"
{html_suffix}\")\n","\n","# initialize display\n","vContainers = []\n","vContainers.append(widgets.HTML(value=f'

Indicate if each of the {n_pairs} record pairs is a match or not

'))\n","\n","# for each set of pairs\n","for n in range(n_pairs):\n","\n"," # get candidate records\n"," candidate_left = display_pd.loc[2*n].to_list()\n"," print(candidate_left)\n"," candidate_right = display_pd.loc[(2*n)+1].to_list()\n"," print(candidate_right)\n","\n"," # define grid to hold values\n"," html = ''\n","\n"," for i in range(display_pd.shape[1]):\n","\n"," # get column name\n"," column_name = display_pd.columns[i]\n","\n"," # if field is image\n"," if column_name == 'image_path':\n","\n"," # define row header\n"," html += ''\n"," html += 'image'\n","\n"," # read left image to encoded string\n"," l_endcode = ''\n"," if candidate_left[i] != '':\n"," with open(candidate_left[i], \"rb\") as l_file:\n"," l_encode = base64.b64encode( l_file.read() ).decode()\n","\n"," # read right image to encoded string\n"," r_encode = ''\n"," if candidate_right[i] != '':\n"," with open(candidate_right[i], \"rb\") as r_file:\n"," r_encode = base64.b64encode( r_file.read() ).decode() \n","\n"," # present images\n"," html += f''\n"," html += f''\n"," html += ''\n","\n"," elif column_name != 'image_path': # display text values\n","\n"," if column_name == 'z_cluster': z_cluster = candidate_left[i]\n","\n"," html += ''\n"," html += f'{column_name}'\n"," html += f'{str(candidate_left[i])}'\n"," html += f'{str(candidate_right[i])}'\n"," html += ''\n","\n"," # insert data table\n"," table = widgets.HTML(value=f''+html+'
')\n"," z_cluster = None\n","\n"," # assign label options to pair\n"," label = widgets.ToggleButtons(\n"," options=LABELS.keys(), \n"," button_style='info'\n"," )\n","\n"," # define blank line between displayed pair and next\n"," blankLine=widgets.HTML(value='
')\n","\n"," # append pair, label and blank line to widget structure\n"," vContainers.append(widgets.VBox(children=[table, label, blankLine]))\n","\n","# present widget\n","display(widgets.VBox(children=vContainers))\n","# ========================================================\n","\n","# mark flag to allow save \n","ready_for_save = True\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":23,"statement_ids":[23],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:38:47.2971586Z","session_start_time":null,"execution_start_time":"2024-12-12T14:39:44.8516182Z","execution_finish_time":"2024-12-12T14:39:45.7453958Z","parent_msg_id":"f4eac308-98ad-4ac2-b881-a6f991545aca"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 23, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":[" z_zid z_cluster z_prediction z_score z_isMatch rec_id \\\n0 34 1734014375837:0 -1.0 0.0 -1 rec-1022-dup-1 \n1 17 1734014375837:0 -1.0 0.0 -1 rec-1029-dup-1 \n2 56 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n3 26 1734014375837:1 -1.0 0.0 -1 rec-1032-dup-0 \n4 47 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n5 17 1734014375837:12 -1.0 0.0 -1 rec-1029-dup-1 \n6 59 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n7 29 1734014375837:16 -1.0 0.0 -1 rec-1034-org \n8 32 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n9 2 1734014375837:2 -1.0 0.0 -1 rec-1021-org \n10 33 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n11 3 1734014375837:3 -1.0 0.0 -1 rec-1022-dup-0 \n12 41 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n13 11 1734014375837:4 -1.0 0.0 -1 rec-1026-dup-0 \n14 57 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n15 27 1734014375837:7 -1.0 0.0 -1 rec-1033-org \n16 47 1734014375837:8 -1.0 0.0 -1 rec-1029-dup-1 \n17 34 1734014375837:8 -1.0 0.0 -1 rec-1022-dup-1 \n18 46 1734007288465:0 -1.0 0.0 -1 rec-1029-dup-0 \n19 24 1734007288465:0 -1.0 0.0 -1 rec-1031-dup-0 \n20 48 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n21 18 1734007288465:1 -1.0 0.0 -1 rec-1029-dup-2 \n22 24 1734007288465:12 -1.0 0.0 -1 rec-1031-dup-0 \n23 1 1734007288465:12 -1.0 0.0 -1 rec-1021-dup-0 \n24 37 1734007288465:3 -1.0 0.0 -1 rec-1022-dup-4 \n25 20 1734007288465:3 -1.0 0.0 -1 rec-1029-dup-4 \n26 53 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n27 23 1734007288465:4 -1.0 0.0 -1 rec-1031-org \n28 46 1734007288465:8 -1.0 0.0 -1 rec-1029-dup-0 \n29 1 1734007288465:8 -1.0 0.0 -1 rec-1021-dup-0 \n\n fname lname stNo add1 add2 \\\n0 jackson eglinton 840 fowles street moun tjiew \n1 sachin stephenson 81 rose scott circuit cordoba manor \n2 brooklyn naar-caftenas 210 duffy street tourist park \n3 brooklyn naar-caftenas 210 duffy street tourist park \n4 sachin stephenson 81 rose scott circuit cordoba manor \n5 sachin stephenson 81 rose scott circuit cordoba manor \n6 jasmine chang 210 magnolia drive sunset valley \n7 jasmine chang 210 magnolia drive sunset valley \n8 thomas george 1 mcmanus place north turramurra \n9 thomas george 1 mcmanus place north turramurra \n10 jackson eglinton 840 fowles street mountview \n11 jackson eglinton 840 fowles street mountview \n12 xani green 2 phill ip avenue abbey green \n13 xani green 2 phill ip avenue abbey green \n14 zachary mccarthy 134 teal street greenwood \n15 zachary mccarthy 134 teal street greenwood \n16 sachin stephenson 81 rose scott circuit cordoba manor \n17 jackson eglinton 840 fowles street moun tjiew \n18 kylee stephenson 81 rose scott circuit cordoba anor \n19 samantha sabieray 68 quandong street wattle brae \n20 annalise stephenson 81 rose scott circuit cordoba manor \n21 annalise stephenson 81 rose scott circuit cordoba manor \n22 samantha sabieray 68 quandong street wattle brae \n23 thomas george 1 mcmanus place stoney creek \n24 jackson eglinton 840 fowles street mountv iew \n25 kylee stephenson 81 rose scott circuit cordoba manor \n26 emma crossman 53 mcdowall place kellhaven \n27 emma crossman 53 mcdowall place kellhaven \n28 kylee stephenson 81 rose scott circuit cordoba anor \n29 thomas george 1 mcmanus place stoney creek \n\n city state dob ssn z_zsource \n0 2830 sa 19830807 2932837 testFebrl \n1 4226 vic 19461101 4783085 testFebrl \n2 2481 nsw 19840802 3624304 testFebrl \n3 2481 nsw 19840802 3624304 testFebrl \n4 4226 vic 19461101 4783085 testFebrl \n5 4226 vic 19461101 4783085 testFebrl \n6 3021 vic 19930203 4562381 testFebrl \n7 3021 vic 19930203 4562381 testFebrl \n8 3130 sa 19630225 5460534 testFebrl \n9 3130 sa 19630225 5460534 testFebrl \n10 2803 sa 19830807 2932837 testFebrl \n11 2803 sa 19830807 2932837 testFebrl \n12 5108 nsw 19390410 9201057 testFebrl \n13 5108 nsw 19390410 9201057 testFebrl \n14 6024 wa 19860219 3241102 testFebrl \n15 6024 wa 19860219 3241102 testFebrl \n16 4226 vic 19461101 4783085 testFebrl \n17 2830 sa 19830807 2932837 testFebrl \n18 4226 vic 19461101 4783085 testFebrl \n19 4019 wa 19590807 2863290 testFebrl \n20 4226 vic 19461101 4783085 testFebrl \n21 4226 vic 19461101 4783085 testFebrl \n22 4019 wa 19590807 2863290 testFebrl \n23 3130 sa 19630225 5460534 testFebrl \n24 2830 sa 19830807 2932837 testFebrl \n25 4226 vic 19461101 4783085 testFebrl \n26 5608 vic 19391027 3561186 testFebrl \n27 5608 vic 19391027 3561186 testFebrl \n28 4226 vic 19461101 4783085 testFebrl \n29 3130 sa 19630225 5460534 testFebrl \n['1734014375837:0', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734014375837:0', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:1', 'rec-1032-dup-0', ' brooklyn', ' naar-caftenas', ' 210', ' duffy street', ' tourist park', ' 2481', ' nsw', ' 19840802', ' 3624304']\n['1734014375837:1', 'rec-1032-dup-0', 'brooklyn', 'naar-caftenas', '210', 'duffy street', 'tourist park', '2481', 'nsw', '19840802', '3624304']\n['1734014375837:12', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:12', 'rec-1029-dup-1', 'sachin', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734014375837:16', 'rec-1034-org', ' jasmine', ' chang', ' 210', ' magnolia drive', ' sunset valley', ' 3021', ' vic', ' 19930203', ' 4562381']\n['1734014375837:16', 'rec-1034-org', 'jasmine', 'chang', '210', 'magnolia drive', 'sunset valley', '3021', 'vic', '19930203', '4562381']\n['1734014375837:2', 'rec-1021-org', ' thomas', ' george', ' 1', ' mcmanus place', ' north turramurra', ' 3130', ' sa', ' 19630225', ' 5460534']\n['1734014375837:2', 'rec-1021-org', 'thomas', 'george', '1', 'mcmanus place', 'north turramurra', '3130', 'sa', '19630225', '5460534']\n['1734014375837:3', 'rec-1022-dup-0', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountview', ' 2803', ' sa', ' 19830807', ' 2932837']\n['1734014375837:3', 'rec-1022-dup-0', 'jackson', 'eglinton', '840', 'fowles street', 'mountview', '2803', 'sa', '19830807', '2932837']\n['1734014375837:4', 'rec-1026-dup-0', ' xani', ' green', ' 2', ' phill ip avenue', ' abbey green', ' 5108', ' nsw', ' 19390410', ' 9201057']\n['1734014375837:4', 'rec-1026-dup-0', 'xani', 'green', '2', 'phill ip avenue', 'abbey green', '5108', 'nsw', '19390410', '9201057']\n['1734014375837:7', 'rec-1033-org', ' zachary', ' mccarthy', ' 134', ' teal street', ' greenwood', ' 6024', ' wa', ' 19860219', ' 3241102']\n['1734014375837:7', 'rec-1033-org', 'zachary', 'mccarthy', '134', 'teal street', 'greenwood', '6024', 'wa', '19860219', '3241102']\n['1734014375837:8', 'rec-1029-dup-1', ' sachin', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734014375837:8', 'rec-1022-dup-1', ' jackson', ' eglinton', ' 840', ' fowles street', ' moun tjiew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:0', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:0', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:1', 'rec-1029-dup-2', ' annalise', ' stephenson', ' 81', ' rose scott circuit', ' cordoba manor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:1', 'rec-1029-dup-2', 'annalise', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:12', 'rec-1031-dup-0', 'samantha', 'sabieray', '68', 'quandong street', 'wattle brae', '4019', 'wa', '19590807', '2863290']\n['1734007288465:12', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n['1734007288465:3', 'rec-1022-dup-4', ' jackson', ' eglinton', ' 840', ' fowles street', ' mountv iew', ' 2830', ' sa', ' 19830807', ' 2932837']\n['1734007288465:3', 'rec-1029-dup-4', 'kylee', 'stephenson', '81', 'rose scott circuit', 'cordoba manor', '4226', 'vic', '19461101', '4783085']\n['1734007288465:4', 'rec-1031-org', ' emma', ' crossman', ' 53', ' mcdowall place', ' kellhaven', ' 5608', ' vic', ' 19391027', ' 3561186']\n['1734007288465:4', 'rec-1031-org', 'emma', 'crossman', '53', 'mcdowall place', 'kellhaven', '5608', 'vic', '19391027', '3561186']\n['1734007288465:8', 'rec-1029-dup-0', ' kylee', ' stephenson', ' 81', ' rose scott circuit', ' cordoba anor', ' 4226', ' vic', ' 19461101', ' 4783085']\n['1734007288465:8', 'rec-1021-dup-0', 'thomas', 'george', '1', 'mcmanus place', 'stoney creek', '3130', 'sa', '19630225', '5460534']\n"]},{"output_type":"display_data","data":{"text/plain":"VBox(children=(HTML(value='

Indicate if each of the 15 record pairs is a match or not

'), VBox(chil…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"01ee458406bc4bc7aae55eb99c0b504b"}},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":24,"statement_ids":[24],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:07.0951338Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:07.7673389Z","execution_finish_time":"2024-12-12T14:40:08.7466527Z","parent_msg_id":"bdc81fed-0318-4c1e-9a05-c19863f74f86"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 24, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":25,"statement_ids":[25],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:11.2518685Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:11.8231998Z","execution_finish_time":"2024-12-12T14:40:12.0645572Z","parent_msg_id":"875bd6d4-812c-4287-89ec-65b08d5b15f7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 25, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":26,"statement_ids":[26],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:18.2988145Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:18.8789311Z","execution_finish_time":"2024-12-12T14:40:19.1201871Z","parent_msg_id":"5db081fe-5e88-4519-a2c6-fcc370fbfafc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 26, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":27,"statement_ids":[27],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:42.2210094Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:42.7984267Z","execution_finish_time":"2024-12-12T14:40:43.0525888Z","parent_msg_id":"048f0931-0eaf-4be3-ae1f-cbd4c06d2e9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 27, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":28,"statement_ids":[28],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:43.7678985Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:44.3138165Z","execution_finish_time":"2024-12-12T14:40:44.5580052Z","parent_msg_id":"462f3847-e026-4744-9b81-4435f1c8ad9c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 28, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":29,"statement_ids":[29],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:40:55.8774777Z","session_start_time":null,"execution_start_time":"2024-12-12T14:40:56.4326849Z","execution_finish_time":"2024-12-12T14:40:56.7235357Z","parent_msg_id":"16b1eb37-22d6-440f-85ff-57c744336e9f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 29, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":30,"statement_ids":[30],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:03.1431734Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:03.6780666Z","execution_finish_time":"2024-12-12T14:41:03.9184142Z","parent_msg_id":"08566780-4456-4005-be13-646d0df8ca23"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 30, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":31,"statement_ids":[31],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:12.9413749Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:13.5109925Z","execution_finish_time":"2024-12-12T14:41:13.7677758Z","parent_msg_id":"37011b0e-d098-4aa2-b74b-9f7ed8e5092f"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 31, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":32,"statement_ids":[32],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:23.0819227Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:23.7271973Z","execution_finish_time":"2024-12-12T14:41:23.9748964Z","parent_msg_id":"00b11703-7206-4822-8eeb-ea326f892b1e"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 32, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":33,"statement_ids":[33],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:31.7381977Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:32.2866112Z","execution_finish_time":"2024-12-12T14:41:32.5342842Z","parent_msg_id":"65cbb945-0a65-4942-bfaa-233cbc4641ee"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 33, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":34,"statement_ids":[34],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:39.941469Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:40.5983996Z","execution_finish_time":"2024-12-12T14:41:40.848122Z","parent_msg_id":"0f447c56-a165-436a-b7a1-7d5096f3f966"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 34, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":35,"statement_ids":[35],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:41:51.2539429Z","session_start_time":null,"execution_start_time":"2024-12-12T14:41:51.8238466Z","execution_finish_time":"2024-12-12T14:41:52.075655Z","parent_msg_id":"09ec44eb-26ef-4d82-b198-22ab624c9ecc"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 35, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":36,"statement_ids":[36],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:02.26967Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:02.8636434Z","execution_finish_time":"2024-12-12T14:42:03.1209762Z","parent_msg_id":"d701ef7e-6c03-4f6f-bccc-3d1dd11d246c"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 36, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":37,"statement_ids":[37],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:11.285235Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:11.8311926Z","execution_finish_time":"2024-12-12T14:42:12.0650602Z","parent_msg_id":"d3820343-a606-479d-bcfe-9c1da6f2a104"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 37, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":38,"statement_ids":[38],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:20.7858335Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:21.3273077Z","execution_finish_time":"2024-12-12T14:42:21.6218612Z","parent_msg_id":"744f8a1d-0658-4fe8-ba1a-c225cb1f2bf7"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 38, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":39,"statement_ids":[39],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:30.8794009Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:31.4177187Z","execution_finish_time":"2024-12-12T14:42:31.6735656Z","parent_msg_id":"34e08c99-8c30-4af2-8fae-fe81e0f51e1b"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 39, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":40,"statement_ids":[40],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:42:41.3482104Z","session_start_time":null,"execution_start_time":"2024-12-12T14:42:41.8980878Z","execution_finish_time":"2024-12-12T14:42:42.1374491Z","parent_msg_id":"3daf28a4-fbc8-4efd-a361-7cb4a2d489b4"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 40, Finished, Available, Finished)"},"metadata":{}}],"execution_count":19,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"2fbe3b6c-9a71-4c3f-8cd6-af6eedad956c"},{"cell_type":"code","source":["notebookutils.fs.ls(\"/\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":5,"statement_ids":[5],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:37:55.2180433Z","session_start_time":null,"execution_start_time":"2024-12-12T14:38:05.3684078Z","execution_finish_time":"2024-12-12T14:38:08.0399328Z","parent_msg_id":"340db6fd-15b9-49e4-b8d4-124a4cc2f05d"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 5, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":7,"data":{"text/plain":"[FileInfo(path=abfss://e803987a-98b6-445f-815c-3d15c2c46877@onelake.dfs.fabric.microsoft.com/36ef8bc2-c67a-4512-b060-e25489729c71, name=36ef8bc2-c67a-4512-b060-e25489729c71, size=0)]"},"metadata":{}}],"execution_count":1,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"77417f1d-c2a6-4160-9b9c-12b0fbee5839"},{"cell_type":"code","source":["if not ready_for_save:\n"," print('No labels have been assigned. Run the previous cell to create candidate pairs and assign labels to them before re-running this cell.')\n","\n","else:\n","\n"," # ASSIGN LABEL VALUE TO CANDIDATE PAIRS IN DATAFRAME\n"," # ========================================================\n"," # for each pair in displayed widget\n"," for pair in vContainers[1:]:\n","\n"," # get pair and assigned label\n"," html_content = pair.children[1].get_interact_value() # the displayed pair as html\n"," user_assigned_label = pair.children[1].get_interact_value() # the assigned label\n","\n"," # extract candidate pair id from html pair content\n"," start = pair.children[0].value.find('data-title=\"')\n"," if start > 0: \n"," start += len('data-title=\"') \n"," end = pair.children[0].value.find('\"', start+2)\n"," pair_id = pair.children[0].value[start:end]\n","\n","\n","\n"," # assign label to candidate pair entry in dataframe\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster']==pair_id, 'z_isMatch'] = LABELS.get(user_assigned_label)\n"," # ========================================================\n","\n"," # SAVE LABELED DATA TO ZINGG FOLDER\n"," # ========================================================\n"," # make target directory if needed\n"," notebookutils.fs.mkdirs(MARKED_DIR)\n"," \n"," # save label assignments\n"," # save labels\n"," zingg.writeLabelledOutputFromPandas(candidate_pairs_pd,args)\n","\n"," # count labels accumulated\n"," marked_pd_df = getPandasDfFromDs(zingg.getMarkedRecords())\n"," n_pos, n_neg, n_tot = count_labeled_pairs(marked_pd_df)\n"," print(f'You have accumulated {n_pos} pairs labeled as positive matches.')\n"," print(f'You have accumulated {n_neg} pairs labeled as not matches.')\n"," print(\"If you need more pairs to label, re-run the cell for 'findTrainingData'\")\n"," # ======================================================== \n","\n"," # save completed\n"," ready_for_save = False"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":41,"statement_ids":[41],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:43:16.772682Z","session_start_time":null,"execution_start_time":"2024-12-12T14:43:17.381583Z","execution_finish_time":"2024-12-12T14:43:31.9046383Z","parent_msg_id":"ed09275a-e109-4cb1-802d-3909c879a2ad"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 41, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/opt/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py:147: UserWarning: DataFrame constructor is internal. Do not directly use it.\n warnings.warn(\"DataFrame constructor is internal. Do not directly use it.\")\n"]},{"output_type":"stream","name":"stdout","text":["You have accumulated 9 pairs labeled as positive matches.\nYou have accumulated 6 pairs labeled as not matches.\nIf you need more pairs to label, re-run the cell for 'findTrainingData'\n"]}],"execution_count":20,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9795bb7f-cd3e-41c5-98fd-6341129df8e3"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"trainMatch\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":42,"statement_ids":[42],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T14:49:47.2575582Z","session_start_time":null,"execution_start_time":"2024-12-12T14:49:47.8553896Z","execution_finish_time":"2024-12-12T14:51:37.5141836Z","parent_msg_id":"f77d784e-0276-440c-8113-c6d060096abf"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 42, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'trainMatch']\narguments for client options are ['--phase', 'trainMatch', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":21,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"71928547-bc82-4653-960f-6c376524f651"},{"cell_type":"code","source":["outputDF = spark.read.csv(\"abfss://Zingg@onelake.dfs.fabric.microsoft.com/data.Lakehouse/Files/part-00000-d624fac4-b80c-4f8d-aebc-5d5faf351b8f-c000.csv\")\n","\n","colNames = [\"z_minScore\", \"z_maxScore\", \"z_cluster\", \"rec_id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"state\", \"dob\", \"ssn\"]\n","outputDF.toDF(*colNames).show(100)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":47,"statement_ids":[47],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:05:16.9588841Z","session_start_time":null,"execution_start_time":"2024-12-12T15:05:17.7549538Z","execution_finish_time":"2024-12-12T15:05:19.4042746Z","parent_msg_id":"f45225e4-62b8-4836-b7d8-bf0d91575730"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 47, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n| z_minScore| z_maxScore|z_cluster| rec_id| fname| lname|stNo| add1| add2|city|state| dob| ssn|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n|0.9999999999995524|0.9999999999995524| 26|rec-1032-dup-0|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999995358|0.9999999999995358| 24|rec-1031-dup-0|samantha| sabieray| 68| quandong street| wattle brae|4019| wa|19590807|2863290|\n|0.9999999977273273|0.9999999977273273| 2| rec-1021-org| thomas| george| 1| mcmanus place|north turramurra|3130| sa|19630225|5460534|\n|0.9999999999997746|0.9999999999997746| 15| rec-1028-org|eglinton| NULL| 24| curriecrescent| woorniyan|3749| qld|19180205|9341716|\n|0.9999999999991117|0.9999999999991117| 18|rec-1029-dup-2|annalise| stephenson| 81|rose scott circuit| cordoba manor|4226| vic|19461101|4783085|\n|0.9999999999991869|0.9999999999991869| 29| rec-1034-org| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999988902|0.9999999999988902| 3|rec-1022-dup-0| jackson| eglinton| 840| fowles street| mountview|2803| sa|19830807|2932837|\n|0.9999999999994619|0.9999999999994619| 19|rec-1029-dup-3| kylee| turale| 81| cordoba manor| ashfield|4226| vic|19461101|4783085|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999999976269|0.9999999999976269| 4|rec-1022-dup-1| jackson| eglinton| 840| fowles street| moun tjiew|2830| sa|19830807|2932837|\n|0.9999999969422861|0.9999999969422861| 1|rec-1021-dup-0| thomas| george| 1| mcmanus place| stoney creek|3130| sa|19630225|5460534|\n|0.9999999999990814|0.9999999999990814| 8| rec-1023-org| gianni| matson| 701| willis street| boonoobloo|3101| vic|19410111|2540080|\n|0.9999999969610703|0.9999999969610703| 12|rec-1026-dup-1| xani| green| 2| phillip avenue| armidale|5108| nsw|19390410|9201057|\n|0.9999999999994932|0.9999999999994932| 23| rec-1031-org| emma| crossman| 53| mcdowall place| kellhaven|5608| vic|19391027|3561186|\n|0.9999999999995524|0.9999999999995524| 25| rec-1032-org|brooklyn|naar-caftenas| 210| duffy street| tourist park|2481| nsw|19840802|3624304|\n|0.9999999999973147|0.9999999999973147| 5|rec-1022-dup-2| jackson| eglinton| 840| fowles street| mou nview|2830| sa|19830807|2932837|\n|0.9999999999991869|0.9999999999991869| 28|rec-1034-dup-0| jasmine| chang| 210| magnolia drive| sunset valley|3021| vic|19930203|4562381|\n|0.9999999988648708|0.9999999988648708| 0| rec-1020-org| blake| ryan| 4| starling place| berkeley vlge|5412| nsw|19271027|2402765|\n+------------------+------------------+---------+--------------+--------+-------------+----+------------------+----------------+----+-----+--------+-------+\n\n"]}],"execution_count":26,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"383bac89-e461-431f-ba14-5ab59941942c"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"generateDocs\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":48,"statement_ids":[48],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:06:42.854029Z","session_start_time":null,"execution_start_time":"2024-12-12T15:06:43.5186144Z","execution_finish_time":"2024-12-12T15:06:46.2120472Z","parent_msg_id":"f73996c7-08d7-4621-b654-4975b23615ab"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 48, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["['--phase', 'generateDocs']\narguments for client options are ['--phase', 'generateDocs', '--license', 'zinggLic.txt', '--email', 'zingg@zingg.ai', '--conf', 'dummyConf.json']\n"]}],"execution_count":27,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"da00dc40-2163-4247-bfef-21fa918ddfdd"},{"cell_type":"code","source":["DOCS_DIR = zinggDir + \"/\" + modelId + \"/docs/\""],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":50,"statement_ids":[50],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:24.1740612Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:24.7585436Z","execution_finish_time":"2024-12-12T15:11:25.0621234Z","parent_msg_id":"808875a7-ca97-42ba-b75c-ea92d72410a5"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 50, Finished, Available, Finished)"},"metadata":{}}],"execution_count":29,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0d4e3074-53a5-44a0-9b48-8f0f76a7c950"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"model.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":51,"statement_ids":[51],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:11:35.8141287Z","session_start_time":null,"execution_start_time":"2024-12-12T15:11:36.3540639Z","execution_finish_time":"2024-12-12T15:11:36.652124Z","parent_msg_id":"81153656-b2b8-4430-bc2a-d385f917e9a2"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 51, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n Zingg Model Documentation\n \n\n\n\n

\n \n\t \n\t\t \t\n\t\t\t\t\n\t\t \t\n\t \n
Unmarked 0/15, Marked 15/15 (9 Matches, 6 Non-Matches, 0 Unsure)
\n

\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Cluster z_score z_isMatch rec_id fname lname stNo add1 add2 city state dob ssn z_zsource
\n 1734007288465:0\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:1\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-2\n\n \n \n annalise\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-2\n\n \n \nannalise\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:12\n \n0\n\n \n \n0\n\n \n \nrec-1031-dup-0\n\n \n \nsamantha\n\n \n \nsabieray\n\n \n \n68\n\n \n \nquandong street\n\n \n \nwattle brae\n\n \n \n4019\n\n \n \nwa\n\n \n \n19590807\n\n \n \n2863290\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:3\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-4\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountv iew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-4\n\n \n \nkylee\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:4\n \n0\n\n \n \n1\n\n \n \nrec-1031-org\n\n \n \n emma\n\n \n \n crossman\n\n \n \n 53\n\n \n \n mcdowall place\n\n \n \n kellhaven\n\n \n \n 5608\n\n \n \n vic\n\n \n \n 19391027\n\n \n \n 3561186\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1031-org\n\n \n \nemma\n\n \n \ncrossman\n\n \n \n53\n\n \n \nmcdowall place\n\n \n \nkellhaven\n\n \n \n5608\n\n \n \nvic\n\n \n \n19391027\n\n \n \n3561186\n\n \n \ntestFebrl\n\n \n
\n 1734007288465:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-0\n\n \n \n kylee\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba anor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-dup-0\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nstoney creek\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:0\n \n0\n\n \n \n0\n\n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:1\n \n0\n\n \n \n1\n\n \n \nrec-1032-dup-0\n\n \n \nbrooklyn\n\n \n \nnaar-caftenas\n\n \n \n210\n\n \n \nduffy street\n\n \n \ntourist park\n\n \n \n2481\n\n \n \nnsw\n\n \n \n19840802\n\n \n \n3624304\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1032-dup-0\n\n \n \n brooklyn\n\n \n \n naar-caftenas\n\n \n \n 210\n\n \n \n duffy street\n\n \n \n tourist park\n\n \n \n 2481\n\n \n \n nsw\n\n \n \n 19840802\n\n \n \n 3624304\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:12\n \n0\n\n \n \n1\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1029-dup-1\n\n \n \nsachin\n\n \n \nstephenson\n\n \n \n81\n\n \n \nrose scott circuit\n\n \n \ncordoba manor\n\n \n \n4226\n\n \n \nvic\n\n \n \n19461101\n\n \n \n4783085\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:16\n \n0\n\n \n \n1\n\n \n \nrec-1034-org\n\n \n \n jasmine\n\n \n \n chang\n\n \n \n 210\n\n \n \n magnolia drive\n\n \n \n sunset valley\n\n \n \n 3021\n\n \n \n vic\n\n \n \n 19930203\n\n \n \n 4562381\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1034-org\n\n \n \njasmine\n\n \n \nchang\n\n \n \n210\n\n \n \nmagnolia drive\n\n \n \nsunset valley\n\n \n \n3021\n\n \n \nvic\n\n \n \n19930203\n\n \n \n4562381\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:2\n \n0\n\n \n \n1\n\n \n \nrec-1021-org\n\n \n \n thomas\n\n \n \n george\n\n \n \n 1\n\n \n \n mcmanus place\n\n \n \n north turramurra\n\n \n \n 3130\n\n \n \n sa\n\n \n \n 19630225\n\n \n \n 5460534\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1021-org\n\n \n \nthomas\n\n \n \ngeorge\n\n \n \n1\n\n \n \nmcmanus place\n\n \n \nnorth turramurra\n\n \n \n3130\n\n \n \nsa\n\n \n \n19630225\n\n \n \n5460534\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:3\n \n0\n\n \n \n1\n\n \n \nrec-1022-dup-0\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n mountview\n\n \n \n 2803\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-0\n\n \n \njackson\n\n \n \neglinton\n\n \n \n840\n\n \n \nfowles street\n\n \n \nmountview\n\n \n \n2803\n\n \n \nsa\n\n \n \n19830807\n\n \n \n2932837\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:4\n \n0\n\n \n \n1\n\n \n \nrec-1026-dup-0\n\n \n \n xani\n\n \n \n green\n\n \n \n 2\n\n \n \n phill ip avenue\n\n \n \n abbey green\n\n \n \n 5108\n\n \n \n nsw\n\n \n \n 19390410\n\n \n \n 9201057\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1026-dup-0\n\n \n \nxani\n\n \n \ngreen\n\n \n \n2\n\n \n \nphill ip avenue\n\n \n \nabbey green\n\n \n \n5108\n\n \n \nnsw\n\n \n \n19390410\n\n \n \n9201057\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:7\n \n0\n\n \n \n1\n\n \n \nrec-1033-org\n\n \n \n zachary\n\n \n \n mccarthy\n\n \n \n 134\n\n \n \n teal street\n\n \n \n greenwood\n\n \n \n 6024\n\n \n \n wa\n\n \n \n 19860219\n\n \n \n 3241102\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1033-org\n\n \n \nzachary\n\n \n \nmccarthy\n\n \n \n134\n\n \n \nteal street\n\n \n \ngreenwood\n\n \n \n6024\n\n \n \nwa\n\n \n \n19860219\n\n \n \n3241102\n\n \n \ntestFebrl\n\n \n
\n 1734014375837:8\n \n0\n\n \n \n0\n\n \n \nrec-1029-dup-1\n\n \n \n sachin\n\n \n \n stephenson\n\n \n \n 81\n\n \n \n rose scott circuit\n\n \n \n cordoba manor\n\n \n \n 4226\n\n \n \n vic\n\n \n \n 19461101\n\n \n \n 4783085\n\n \n \ntestFebrl\n\n \n
\n\n \n \n \n \n \nrec-1022-dup-1\n\n \n \n jackson\n\n \n \n eglinton\n\n \n \n 840\n\n \n \n fowles street\n\n \n \n moun tjiew\n\n \n \n 2830\n\n \n \n sa\n\n \n \n 19830807\n\n \n \n 2932837\n\n \n \ntestFebrl\n\n \n
\n \n\n

\n\n\n"},"metadata":{}}],"execution_count":30,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9e4ad578-f75f-4011-8027-dc565933adc6"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"data.html\", 'r').read())"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":52,"statement_ids":[52],"state":"finished","livy_statement_state":"available","session_id":"e8d52d7f-1f5d-4897-a638-4465746c84f8","normalized_state":"finished","queued_time":"2024-12-12T15:13:39.3741915Z","session_start_time":null,"execution_start_time":"2024-12-12T15:13:39.95129Z","execution_finish_time":"2024-12-12T15:13:40.2508845Z","parent_msg_id":"e6afa7a6-fd1b-454d-af86-38b6e6686506"},"text/plain":"StatementMeta(, e8d52d7f-1f5d-4897-a638-4465746c84f8, 52, Finished, Available, Finished)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"","text/html":"\n\n\tData Documentation\n\t\n\n\n\t\n\n\t\n\t\t\t\n\t\t\t\n\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\n\t
Field NameField TypeNullable
\n\t\t\t\t\trec_id\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tfname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tlname\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstNo\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd1\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tadd2\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tcity\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tstate\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tdob\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\t\t\t\t\tssn\n\t\t\t\t\t\n\t\t\t\t\tStringType\n\t\t\t\t\t\n\t\t\t\t\ttrue\n\t\t\t\t\t
\n\n\n\n"},"metadata":{}}],"execution_count":31,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e58aad4c-1ee3-4977-b211-ebeb9d7539c9"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"version_major":2,"version_minor":0,"state":{"0112614dd803438a986c77cfda539dba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cd7680c5c7d54872b46d824dfd45b61f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_04911938acd2486e8fc0ded740020ea1","style":"IPY_MODEL_ad77a508719f4730a16cf01475525150"}},"6f94a4de6db941189e6a0deabf52e2ad":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7f48a6c51c9f458a80deed26ea3b9011","IPY_MODEL_9efc44bbb2af482989a69577c7b793d0","IPY_MODEL_abc4ad768b3d4f75b3f6f8e3d9d3350d"],"layout":"IPY_MODEL_e0d2670f67e34eee81694ce7b7c97cd7"}},"0c26c8827bf54b95a4cc7d119b485e81":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e5b99552291e4649acf8760161e02ad9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"6a13045354274a089c720f0a3f6fc7b7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_a78ca3ab571448c09c99720e6914c9a5","IPY_MODEL_fd4beb5f2be94c609aed0730b98b9fea","IPY_MODEL_2019411034194afc8bea365fa7205623"],"layout":"IPY_MODEL_41e5e2f1dabe421d90c77a0af367cc74"}},"1a16c51638774862acb327afd5a6f057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ae4bd3e8f34741e7b87423cdaf49a198":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"01b2b8f50eb348cf9ee75f3145179cee":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8b71f2fe25b0404faedd772588744c33":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"7f48a6c51c9f458a80deed26ea3b9011":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_9f7543b4d79248bc8ecf6e9ce6bf31cf","style":"IPY_MODEL_241d4546ce8b4f0684be34c8b75eb58f"}},"d3bb974dd1f0490bb77dffaf8540d439":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_47e1703b3d45461f816b4ec1f8ea445a","style":"IPY_MODEL_8b71f2fe25b0404faedd772588744c33"}},"2266b285bd664631a0a6c9e89a35ed51":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"3af6c6b8d18d48ca89cbc4f5299f6f72":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e9d8900ddcf64682bbf5198fbf46f39d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7468229546d94bfcab6525edb9757637","tooltips":[],"style":"IPY_MODEL_f1bad4094ead437cbc0eda8372c538a8","icons":[]}},"63e74252206d4c5db3c7a350096b0435":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4cbbd9bb43ea4bcb82861e22c1478cf3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c26c8827bf54b95a4cc7d119b485e81","style":"IPY_MODEL_db63ca43d6934485987860bb1f441f29"}},"67d9530cacbf4bbe8144836c57e61acb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:81734014375837:8
rec_idrec-1029-dup-1rec-1022-dup-1
fname sachin jackson
lname stephenson eglinton
stNo 81 840
add1 rose scott circuit fowles street
add2 cordoba manor moun tjiew
city 4226 2830
state vic sa
dob 19461101 19830807
ssn 4783085 2932837
","layout":"IPY_MODEL_7862a64b0ced43e8b70b7f5684987936","style":"IPY_MODEL_2d427fa36cec488e8239a8c453efc375"}},"1829f914d5274fcc89106d626e3295de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7a6c3a89abf64a438aa69a6d0e63782e","IPY_MODEL_8b544a3eb42548698fec50307ca58cf0","IPY_MODEL_7ab4a49ee5cc4cd2bdc3a7b0cd066e29"],"layout":"IPY_MODEL_9d57f12f444b47b58f6982290bc17ba2"}},"d973662f8e8d4d80add362dc786e8325":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"ad77a508719f4730a16cf01475525150":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"39cadceacdbc4966a574c52a98c6260d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 6 record pairs is a match or not

","layout":"IPY_MODEL_5694a3ce6d8d4ae4b3022ded67aa7fd6","style":"IPY_MODEL_d973662f8e8d4d80add362dc786e8325"}},"8e9304290aab4a1fa38a89411af22922":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2d427fa36cec488e8239a8c453efc375":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9909b484567e49d3a2b619fec9e125b9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9fe8115b161a4a309887a31b449f2989":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"970014aa3a6b4acb981c239e49b5c8a1":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eedf22cb2361430099f8f6169cb418ea":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_f5e420d27b5d4c92bc8380c01cfa2151","IPY_MODEL_40544637e23545a1a6fc511777301f2d","IPY_MODEL_fcd49a0c3a1342b1bb6473cf90c1b88b"],"layout":"IPY_MODEL_f1be32a9a51445f98e99e3b4a2c697bb"}},"6225593e71364eb181cff48c1cfcfcc2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a78b5089adc74cd896d1e477251a4ac6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5306ed2302184ab8ba22c30999cb5572":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d1ca7f2a677e4e2783d660faee4c4701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_1f1ae689a00642b597a76f6721a06432","style":"IPY_MODEL_fe6677ee651742e1abf26212230c71af"}},"721f29e0f7664888a2936a3ceddafb6d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"23f62e8b7e2e4be1ae544202d2c1d38d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3fc421549e7425b815de2a3d01602d1","style":"IPY_MODEL_7f44c72c66414102acab1c2578025735"}},"4402fa32ec2e4f12afbd61344d431bcc":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"78889cdf217643fa9f4d114f1918b2f6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"083dbadeee3f4683a499f9b612768701":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c847d55d401e46bba108bca1bf8a7770","style":"IPY_MODEL_efade4d483f24f349d3d478be973b355"}},"1e2bcb99927b4a8cb5c7dd4eaac39225":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0371cfc91c0d421ab01ddd16b3972743":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_3bda20edce274aa7b1a92b98914530e1","IPY_MODEL_ccbf1dffd785415594fd880aa5cc8edf","IPY_MODEL_498839735d8f40018aca7aac0da8f5c9"],"layout":"IPY_MODEL_25e1281b496a4a958955a4d9091ca382"}},"01ee458406bc4bc7aae55eb99c0b504b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_af7596b42e5c4b9da6a85846c55f2092","IPY_MODEL_e3697e92e3e04c82b865bc3328dcad2b","IPY_MODEL_4c7afd0822eb4871b7708acbfb040fbf","IPY_MODEL_5d8d51ddc216416cb12979d0f38aae5a","IPY_MODEL_4ddf0fd6818343a58cee87bd452691eb","IPY_MODEL_a8bf95eb6af447ee89f946a9b6b4f1a9","IPY_MODEL_0371cfc91c0d421ab01ddd16b3972743","IPY_MODEL_804f5f862a2547cc833f3f27c18d69de","IPY_MODEL_b95905218e04479b8cba30790100004b","IPY_MODEL_55172f1685204f24a3b38debc635c6b9","IPY_MODEL_b47d111ecdf142a9bf96dea7cc00f12e","IPY_MODEL_0096a2bb367e4410ab96be94878df836","IPY_MODEL_9f688658e0a84aab86fb4b6e9b14eeb5","IPY_MODEL_6a13045354274a089c720f0a3f6fc7b7","IPY_MODEL_6f94a4de6db941189e6a0deabf52e2ad","IPY_MODEL_1829f914d5274fcc89106d626e3295de"],"layout":"IPY_MODEL_ddcfc3d0e90741c0a6c0b67b47f6f53d"}},"5423e9abb08d4175a8c593b60b35ad8d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"952a9f160893406791ec1975a5af971f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc724d1ceb584472a158a91de7b17cae":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:41734014375837:4
rec_idrec-1026-dup-0rec-1026-dup-0
fname xanixani
lname greengreen
stNo 22
add1 phill ip avenuephill ip avenue
add2 abbey greenabbey green
city 51085108
state nswnsw
dob 1939041019390410
ssn 92010579201057
","layout":"IPY_MODEL_f596ee340faa4691abdef6d010ff513c","style":"IPY_MODEL_9e7440ae7f6844f3a8c084a8379df095"}},"f75d9074d0674656b77cb99efcbfe37d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"498839735d8f40018aca7aac0da8f5c9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c3b9f4a35a1741cdab1b8127376790be","style":"IPY_MODEL_7ec772d0ae8d4365bd39d4a4b8050837"}},"942ce2043b974942801386f7fe813e59":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d7c93338fb5744a98060d36f29894737":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_29bb51c1b4b842d7992d0c6be6e582c8","style":"IPY_MODEL_5250e70ff02e4d219de6502a27b84357"}},"e23cfe9a93804558acc75418021aa409":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:01734014375837:0
rec_idrec-1022-dup-1rec-1029-dup-1
fname jacksonsachin
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 moun tjiewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_a36bb933f92c4ada82504e4c10570057","style":"IPY_MODEL_cbbfcbe143644072846912c9d8f1c6d7"}},"854564d76efa4e17b66c5e86ac9b8783":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_62d1842b557f49399311b9b573dac9d5","style":"IPY_MODEL_abea2c5d5ee14775a1e9c5a025bb83f2"}},"7ad966747291400d9013a2a2e2b26e10":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4c48892283394169b0911d6922a97058":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"56a4135e67644d0a83f0612cfe92fea8":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:161734014375837:16
rec_idrec-1034-orgrec-1034-org
fname jasminejasmine
lname changchang
stNo 210210
add1 magnolia drivemagnolia drive
add2 sunset valleysunset valley
city 30213021
state vicvic
dob 1993020319930203
ssn 45623814562381
","layout":"IPY_MODEL_4ebfc8728d2c4186a14ab0d9e52ca0c5","style":"IPY_MODEL_970014aa3a6b4acb981c239e49b5c8a1"}},"714d113c8c894968a03f8521e9c6bdf7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2019411034194afc8bea365fa7205623":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_08b9883f77f148c0be1916fbe711a94f","style":"IPY_MODEL_a6c854c673a54b54aa8f5894539a717c"}},"6020cfd838a84c38b42baee5e2ab5239":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c3b9f4a35a1741cdab1b8127376790be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f596ee340faa4691abdef6d010ff513c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6cc91e9e20d343679c6c32830b960faa":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"db916c8e786c40abb3db1432a9688e1d":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d7c93338fb5744a98060d36f29894737","IPY_MODEL_279fb85975df426a821e8f7e46c90f25","IPY_MODEL_786c8eb15f0c4f58b458338018aa8e49"],"layout":"IPY_MODEL_ecbd13d9937c463ba6b654348c05dde3"}},"0a1166c59f694b399f6c9bcbb1e6c89a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_6225593e71364eb181cff48c1cfcfcc2","style":"IPY_MODEL_e5b99552291e4649acf8760161e02ad9"}},"454c2074dba54875b5ee91c45e229169":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:11734007288465:1
rec_idrec-1029-dup-2rec-1029-dup-2
fname annaliseannalise
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_270b1bb9c8d740fbb2efecaf2e1f9f9d","style":"IPY_MODEL_8bc2bd72d40d4224a5fff0f2bccdcbd3"}},"18acd101aa8647c39f5a7c247cedf365":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:41734007288465:4
rec_idrec-1031-orgrec-1031-org
fname emmaemma
lname crossmancrossman
stNo 5353
add1 mcdowall placemcdowall place
add2 kellhavenkellhaven
city 56085608
state vicvic
dob 1939102719391027
ssn 35611863561186
","layout":"IPY_MODEL_4c48892283394169b0911d6922a97058","style":"IPY_MODEL_4fdc3a5116b54cb88adc45c257305421"}},"02ccf836a76444bd99fd508ed827e13a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:21734014375837:2
rec_idrec-1021-orgrec-1021-org
fname thomasthomas
lname georgegeorge
stNo 11
add1 mcmanus placemcmanus place
add2 north turramurranorth turramurra
city 31303130
state sasa
dob 1963022519630225
ssn 54605345460534
","layout":"IPY_MODEL_5423e9abb08d4175a8c593b60b35ad8d","style":"IPY_MODEL_d54363eed626420f910bfcfa01b2e420"}},"cc8a117379724417a5481bb9d17126b5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8684f0945a9048019a3165273fa674e6":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"feeb7fe2ee5a40e196cd16cfb2ae7635":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"fcd49a0c3a1342b1bb6473cf90c1b88b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_63e74252206d4c5db3c7a350096b0435","style":"IPY_MODEL_73bdd9f2969640ddba2a56ae39ceb6b7"}},"6722bf94601449c0a162116c1770e74b":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e7b43d6a420f46458c199aab46c9eb43":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7b6b2d02996344f3a8b829ce2ba14026","tooltips":[],"style":"IPY_MODEL_2a82f125b47641b983a65520897e61a9","icons":[]}},"261d645c4aa24c10ad9c02e75ee2d0b0":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"41e5e2f1dabe421d90c77a0af367cc74":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2a571eec79e4117b5c8dcc04d42ea8c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"efade4d483f24f349d3d478be973b355":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4ddf0fd6818343a58cee87bd452691eb":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_56a4135e67644d0a83f0612cfe92fea8","IPY_MODEL_e9d8900ddcf64682bbf5198fbf46f39d","IPY_MODEL_a16fae766e5c4828ac184a17e8da44f9"],"layout":"IPY_MODEL_721f29e0f7664888a2936a3ceddafb6d"}},"a8bf95eb6af447ee89f946a9b6b4f1a9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_02ccf836a76444bd99fd508ed827e13a","IPY_MODEL_9bc94600605c4977ae1694a17888bd17","IPY_MODEL_d1ca7f2a677e4e2783d660faee4c4701"],"layout":"IPY_MODEL_937178220af4423daa2cd35aa8c3263a"}},"937178220af4423daa2cd35aa8c3263a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e3697e92e3e04c82b865bc3328dcad2b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e23cfe9a93804558acc75418021aa409","IPY_MODEL_482b6fc0521849dba90e938d82e68ed5","IPY_MODEL_854564d76efa4e17b66c5e86ac9b8783"],"layout":"IPY_MODEL_beea94f4506a4e83830588c4d4fcb1c7"}},"1320b18208d0404a8af38e1393051351":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"2dc9896b314544f3bd71c32c625e1175":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"435029d048944a1d8bfd7f3af18ffeba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"026ce8c3d7e24f86adada904417924cf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_a78b5089adc74cd896d1e477251a4ac6","tooltips":[],"style":"IPY_MODEL_e2385f8daa6b4e8faecbc68192b40d14","icons":[]}},"0a3dc99ab26f42bf90522b4eabb0ad21":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_727805949ef54a7da481fe155bc77b47","tooltips":[],"style":"IPY_MODEL_7a93d4ae0e91471ab30ca90034d9f90c","icons":[]}},"7ec772d0ae8d4365bd39d4a4b8050837":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"786c8eb15f0c4f58b458338018aa8e49":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_32c982d5fd3545ff8e0bc9cbbe3dc90f","style":"IPY_MODEL_0203adb880ca48e1a6ead1b5af804670"}},"abea2c5d5ee14775a1e9c5a025bb83f2":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2f67e4e809494262b3752db712d75ce7":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_18acd101aa8647c39f5a7c247cedf365","IPY_MODEL_4093238088364a1b934d6722c9468de8","IPY_MODEL_7d62968db1ae4f4c8d5e27028e99c6d3"],"layout":"IPY_MODEL_fb146a7c62e44aab94d15666c4afb50a"}},"a16fae766e5c4828ac184a17e8da44f9":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_9b29c240e7114680978ecef578ce5fd9","style":"IPY_MODEL_fe94e56c365f4bd8afcf9a57eced058e"}},"e1567066674b498ca58437b558f4ee8e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:71734014375837:7
rec_idrec-1033-orgrec-1033-org
fname zacharyzachary
lname mccarthymccarthy
stNo 134134
add1 teal streetteal street
add2 greenwoodgreenwood
city 60246024
state wawa
dob 1986021919860219
ssn 32411023241102
","layout":"IPY_MODEL_c24d9d54deb84bbab0da6405aea82569","style":"IPY_MODEL_6722bf94601449c0a162116c1770e74b"}},"4c7afd0822eb4871b7708acbfb040fbf":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_64f8752992414e9aa3b677911f0d4848","IPY_MODEL_dacefcb9fc10425e80c5233cb0ba4ffd","IPY_MODEL_2757b91608934f0daa7d9f2397a65d8d"],"layout":"IPY_MODEL_514b19922da24f17bb39aa72d78beaf4"}},"9efc44bbb2af482989a69577c7b793d0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_0112614dd803438a986c77cfda539dba","tooltips":[],"style":"IPY_MODEL_825e88947fcc454498b4739c0757c97d","icons":[]}},"afac862e71a043c381874456054c5e41":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9bc94600605c4977ae1694a17888bd17":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_ae4bd3e8f34741e7b87423cdaf49a198","tooltips":[],"style":"IPY_MODEL_4be40990a33d4872871d58e52d09d898","icons":[]}},"e2385f8daa6b4e8faecbc68192b40d14":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"b47d111ecdf142a9bf96dea7cc00f12e":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_10fadcb3c1214044b997e0d2668bd9d3","IPY_MODEL_75ca0d3400af41f0a754c346a121c9b6","IPY_MODEL_91b4da3856884938987c6d2cf5751f9f"],"layout":"IPY_MODEL_8a0d5bc35d6746959993d76e767f4bc8"}},"b72e35612aa7407890a329608f3f0d49":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d2809335c95b4235b0ca86feab6b14d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_f3c9cd7b31a84fb4bd262c69b122e11d","tooltips":[],"style":"IPY_MODEL_8e9304290aab4a1fa38a89411af22922","icons":[]}},"44acc8fae0314cb7a33463d2bc6353e7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3a2907ac772b46ed81c079f41434c74b":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"d0d57063e8b144b49970df32c53ce162":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_882d27a063a94986bc304b02c5222b7a","tooltips":[],"style":"IPY_MODEL_0d2c43c11f554f02b9b0e521a02df66f","icons":[]}},"085d7c0804ab4af6bb42b2928a6c2bd5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"98d458cfcd874e2c8af3998379e6c432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3bda20edce274aa7b1a92b98914530e1":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:31734014375837:3
rec_idrec-1022-dup-0rec-1022-dup-0
fname jacksonjackson
lname eglintoneglinton
stNo 840840
add1 fowles streetfowles street
add2 mountviewmountview
city 28032803
state sasa
dob 1983080719830807
ssn 29328372932837
","layout":"IPY_MODEL_181192c2388e4db190a751c4042e238a","style":"IPY_MODEL_eb072c0a62a24f03b150bc624aad5a5d"}},"fe6677ee651742e1abf26212230c71af":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"29bb51c1b4b842d7992d0c6be6e582c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7b6b2d02996344f3a8b829ce2ba14026":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"19ffca6433c14da198770adae02221be":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17243a3f0b654e11970f9b5bce82f79c":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_d3f5a5077c9b441e832429ae5a364fbc","IPY_MODEL_7661a6f07c404d3392d0834ebb51f2d5","IPY_MODEL_4cbbd9bb43ea4bcb82861e22c1478cf3"],"layout":"IPY_MODEL_1a16c51638774862acb327afd5a6f057"}},"b2130bed69ca4703acb121ebccd506ca":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2a82f125b47641b983a65520897e61a9":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"9b29c240e7114680978ecef578ce5fd9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5694a3ce6d8d4ae4b3022ded67aa7fd6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d3f5a5077c9b441e832429ae5a364fbc":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_085d7c0804ab4af6bb42b2928a6c2bd5","style":"IPY_MODEL_754c27d772534ecaaedab5591427ca09"}},"db63ca43d6934485987860bb1f441f29":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"9f7543b4d79248bc8ecf6e9ce6bf31cf":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"1f1ae689a00642b597a76f6721a06432":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0203adb880ca48e1a6ead1b5af804670":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4fdc3a5116b54cb88adc45c257305421":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5e173e9779fd4ca08143464fd42bdf62":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"214f3e7e895d4f54bbaa829b69ca8671":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"081d75be0414491faaccaec2648ddcd9":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"514b19922da24f17bb39aa72d78beaf4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"595a260ac98d49e6894496961fa7701c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5250e70ff02e4d219de6502a27b84357":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"fe94e56c365f4bd8afcf9a57eced058e":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"10fadcb3c1214044b997e0d2668bd9d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:01734007288465:0
rec_idrec-1029-dup-0rec-1031-dup-0
fname kyleesamantha
lname stephensonsabieray
stNo 8168
add1 rose scott circuitquandong street
add2 cordoba anorwattle brae
city 42264019
state vicwa
dob 1946110119590807
ssn 47830852863290
","layout":"IPY_MODEL_805ed2cf73364f13addeaf13a8073620","style":"IPY_MODEL_115453304b8e477a96726060b0c509ad"}},"da34c9ff8e3b4738a59ec9eb0a39d2cb":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"aed62bd42df24b5788b0fa4f6e8fb610":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7f44c72c66414102acab1c2578025735":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"69c523dee7d54c3b8f0620ad2eb6dc51":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_0c96ba84dad84dbfb3b8347e9e7ae748","style":"IPY_MODEL_6020cfd838a84c38b42baee5e2ab5239"}},"25e1281b496a4a958955a4d9091ca382":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"727805949ef54a7da481fe155bc77b47":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b0d572405b3344278a443aa21138d927":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17f6fddf67e242588f39e2aaf0558678":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"91b4da3856884938987c6d2cf5751f9f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_318d9d146d1f41ee9a169043637dadb7","style":"IPY_MODEL_dad9c9e2d53744f4a2284917a78fd931"}},"7a93d4ae0e91471ab30ca90034d9f90c":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"0d2c43c11f554f02b9b0e521a02df66f":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"279fb85975df426a821e8f7e46c90f25":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_9e8426a14afa4c95bf89465efe99089f","tooltips":[],"style":"IPY_MODEL_47acc27c5bb047009eecaa7aa4974cac","icons":[]}},"f3c9cd7b31a84fb4bd262c69b122e11d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a6c854c673a54b54aa8f5894539a717c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c86d53a9d8394704aaa74e27d7569cc0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6542b2868c0c43359d500c3828ef12ef":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:121734014375837:12
rec_idrec-1029-dup-1rec-1029-dup-1
fname sachinsachin
lname stephensonstephenson
stNo 8181
add1 rose scott circuitrose scott circuit
add2 cordoba manorcordoba manor
city 42264226
state vicvic
dob 1946110119461101
ssn 47830854783085
","layout":"IPY_MODEL_3af6c6b8d18d48ca89cbc4f5299f6f72","style":"IPY_MODEL_e2a571eec79e4117b5c8dcc04d42ea8c"}},"dad9c9e2d53744f4a2284917a78fd931":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"708a2ae873f8426fade245382a8c9208":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_cd7680c5c7d54872b46d824dfd45b61f","IPY_MODEL_012518d9797f4087a352a23bf5ba2aaf","IPY_MODEL_4150bb26c66d4de4954e13af8d0cd781"],"layout":"IPY_MODEL_aed62bd42df24b5788b0fa4f6e8fb610"}},"ccbf1dffd785415594fd880aa5cc8edf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_01b2b8f50eb348cf9ee75f3145179cee","tooltips":[],"style":"IPY_MODEL_5e173e9779fd4ca08143464fd42bdf62","icons":[]}},"788b34a5563a423798cb54ff8d7b996c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"804f5f862a2547cc833f3f27c18d69de":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_fc724d1ceb584472a158a91de7b17cae","IPY_MODEL_d2809335c95b4235b0ca86feab6b14d1","IPY_MODEL_23f62e8b7e2e4be1ae544202d2c1d38d"],"layout":"IPY_MODEL_714d113c8c894968a03f8521e9c6bdf7"}},"4be40990a33d4872871d58e52d09d898":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"4093238088364a1b934d6722c9468de8":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_17a7abd324054f039724fb423e2a67a4","tooltips":[],"style":"IPY_MODEL_afac862e71a043c381874456054c5e41","icons":[]}},"fb146a7c62e44aab94d15666c4afb50a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0c96ba84dad84dbfb3b8347e9e7ae748":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3308de4749240c6bcd404cb4caf7ee4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22483139248d470ca2edbb0b22a669d1":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_c86d53a9d8394704aaa74e27d7569cc0","tooltips":[],"style":"IPY_MODEL_77d77f14d7254453909994ace6b43eb5","icons":[]}},"270b1bb9c8d740fbb2efecaf2e1f9f9d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7af3659f738046f0a562d772fba7aadd":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"64f8752992414e9aa3b677911f0d4848":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734014375837:11734014375837:1
rec_idrec-1032-dup-0rec-1032-dup-0
fname brooklynbrooklyn
lname naar-caftenasnaar-caftenas
stNo 210210
add1 duffy streetduffy street
add2 tourist parktourist park
city 24812481
state nswnsw
dob 1984080219840802
ssn 36243043624304
","layout":"IPY_MODEL_6cc91e9e20d343679c6c32830b960faa","style":"IPY_MODEL_b345a2da49d84b559a59792c488d0c1f"}},"9e7440ae7f6844f3a8c084a8379df095":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"62d1842b557f49399311b9b573dac9d5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"47acc27c5bb047009eecaa7aa4974cac":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8bc2bd72d40d4224a5fff0f2bccdcbd3":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"4abfebecf35e47b8bdab070a428d4a77":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"451cd21ac7b64517b93824dd5ab79460":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2757b91608934f0daa7d9f2397a65d8d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_2292728174764b0bb766d983d2d8f272","style":"IPY_MODEL_2266b285bd664631a0a6c9e89a35ed51"}},"b95905218e04479b8cba30790100004b":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e1567066674b498ca58437b558f4ee8e","IPY_MODEL_8d8dc1ef9db8403dbe741141f95578e6","IPY_MODEL_083dbadeee3f4683a499f9b612768701"],"layout":"IPY_MODEL_435029d048944a1d8bfd7f3af18ffeba"}},"754c27d772534ecaaedab5591427ca09":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"26877fd9c74e49a999f8134e2d8a41d2":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_0a1166c59f694b399f6c9bcbb1e6c89a","IPY_MODEL_d0d57063e8b144b49970df32c53ce162","IPY_MODEL_b3ce0440576c4d22a90b74ecfddf9afb"],"layout":"IPY_MODEL_139af57eb88742fdaf311e40157b4c1b"}},"a78ca3ab571448c09c99720e6914c9a5":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:31734007288465:3
rec_idrec-1022-dup-4rec-1029-dup-4
fname jacksonkylee
lname eglintonstephenson
stNo 84081
add1 fowles streetrose scott circuit
add2 mountv iewcordoba manor
city 28304226
state savic
dob 1983080719461101
ssn 29328374783085
","layout":"IPY_MODEL_f6f566807665447d8947ef4f1c1cb802","style":"IPY_MODEL_081d75be0414491faaccaec2648ddcd9"}},"482b6fc0521849dba90e938d82e68ed5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b72e35612aa7407890a329608f3f0d49","tooltips":[],"style":"IPY_MODEL_f75d9074d0674656b77cb99efcbfe37d","icons":[]}},"2a7ce010e31c474d834773f51158ad6c":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"32c982d5fd3545ff8e0bc9cbbe3dc90f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"dacefcb9fc10425e80c5233cb0ba4ffd":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_22aaffab00674834860abe4b7df78f36","tooltips":[],"style":"IPY_MODEL_3a2907ac772b46ed81c079f41434c74b","icons":[]}},"f5e420d27b5d4c92bc8380c01cfa2151":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:121734007288465:12
rec_idrec-1031-dup-0rec-1021-dup-0
fnamesamanthathomas
lnamesabieraygeorge
stNo681
add1quandong streetmcmanus place
add2wattle braestoney creek
city40193130
statewasa
dob1959080719630225
ssn28632905460534
","layout":"IPY_MODEL_b2130bed69ca4703acb121ebccd506ca","style":"IPY_MODEL_942ce2043b974942801386f7fe813e59"}},"77d77f14d7254453909994ace6b43eb5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"f6f566807665447d8947ef4f1c1cb802":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ecbd13d9937c463ba6b654348c05dde3":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8a0d5bc35d6746959993d76e767f4bc8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"805ed2cf73364f13addeaf13a8073620":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22aaffab00674834860abe4b7df78f36":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc7bff94e2684f51b8ff148cdf04d0ff":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_39cadceacdbc4966a574c52a98c6260d","IPY_MODEL_17243a3f0b654e11970f9b5bce82f79c","IPY_MODEL_26877fd9c74e49a999f8134e2d8a41d2","IPY_MODEL_eedf22cb2361430099f8f6169cb418ea","IPY_MODEL_708a2ae873f8426fade245382a8c9208","IPY_MODEL_2f67e4e809494262b3752db712d75ce7","IPY_MODEL_db916c8e786c40abb3db1432a9688e1d"],"layout":"IPY_MODEL_214f3e7e895d4f54bbaa829b69ca8671"}},"9f688658e0a84aab86fb4b6e9b14eeb5":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_69c523dee7d54c3b8f0620ad2eb6dc51","IPY_MODEL_026ce8c3d7e24f86adada904417924cf","IPY_MODEL_5227aa6fa7c749238d811d462cb0fe36"],"layout":"IPY_MODEL_bd88f0c19aff4c1cb0bd3a5c52db200b"}},"d7ab081b539e42649eef86e6f7b6c76d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b59772ab1d914a24bcb3a77947962f2c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"08b9883f77f148c0be1916fbe711a94f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7468229546d94bfcab6525edb9757637":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"17a7abd324054f039724fb423e2a67a4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fbf9d80d166744d88c66208824d17c24":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0c49cc29fbd04b46b38f410912a180d9","style":"IPY_MODEL_b27b76432a684b6980b5052cadfea618"}},"e0d2670f67e34eee81694ce7b7c97cd7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d54363eed626420f910bfcfa01b2e420":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"241d4546ce8b4f0684be34c8b75eb58f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2292728174764b0bb766d983d2d8f272":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"af7596b42e5c4b9da6a85846c55f2092":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 15 record pairs is a match or not

","layout":"IPY_MODEL_4abfebecf35e47b8bdab070a428d4a77","style":"IPY_MODEL_4402fa32ec2e4f12afbd61344d431bcc"}},"8d8dc1ef9db8403dbe741141f95578e6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":1,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b59772ab1d914a24bcb3a77947962f2c","tooltips":[],"style":"IPY_MODEL_8684f0945a9048019a3165273fa674e6","icons":[]}},"7d62968db1ae4f4c8d5e27028e99c6d3":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_78889cdf217643fa9f4d114f1918b2f6","style":"IPY_MODEL_261d645c4aa24c10ad9c02e75ee2d0b0"}},"c3fc421549e7425b815de2a3d01602d1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"825e88947fcc454498b4739c0757c97d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"882d27a063a94986bc304b02c5222b7a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ddcfc3d0e90741c0a6c0b67b47f6f53d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"55172f1685204f24a3b38debc635c6b9":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_67d9530cacbf4bbe8144836c57e61acb","IPY_MODEL_e7b43d6a420f46458c199aab46c9eb43","IPY_MODEL_fbf9d80d166744d88c66208824d17c24"],"layout":"IPY_MODEL_19ffca6433c14da198770adae02221be"}},"73bdd9f2969640ddba2a56ae39ceb6b7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"40544637e23545a1a6fc511777301f2d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7d018bb285e1499692cbb241516046f2","tooltips":[],"style":"IPY_MODEL_e2d942ea35174426aa46171c6348c308","icons":[]}},"c847d55d401e46bba108bca1bf8a7770":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1bad4094ead437cbc0eda8372c538a8":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"115453304b8e477a96726060b0c509ad":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"b27b76432a684b6980b5052cadfea618":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"eb072c0a62a24f03b150bc624aad5a5d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"bd88f0c19aff4c1cb0bd3a5c52db200b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"139af57eb88742fdaf311e40157b4c1b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9e8426a14afa4c95bf89465efe99089f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"653d6750617f4c788c17ae743b0da13b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"abc4ad768b3d4f75b3f6f8e3d9d3350d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_98d458cfcd874e2c8af3998379e6c432","style":"IPY_MODEL_a7171853339643a48382ec125a26944d"}},"0096a2bb367e4410ab96be94878df836":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_454c2074dba54875b5ee91c45e229169","IPY_MODEL_0a3dc99ab26f42bf90522b4eabb0ad21","IPY_MODEL_d3bb974dd1f0490bb77dffaf8540d439"],"layout":"IPY_MODEL_7ad966747291400d9013a2a2e2b26e10"}},"0c49cc29fbd04b46b38f410912a180d9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4150bb26c66d4de4954e13af8d0cd781":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b3308de4749240c6bcd404cb4caf7ee4","style":"IPY_MODEL_595a260ac98d49e6894496961fa7701c"}},"181192c2388e4db190a751c4042e238a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ab4a49ee5cc4cd2bdc3a7b0cd066e29":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_7ef6892a4e7444458465dd5a5e76fae5","style":"IPY_MODEL_788b34a5563a423798cb54ff8d7b996c"}},"beea94f4506a4e83830588c4d4fcb1c7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a36bb933f92c4ada82504e4c10570057":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e2d942ea35174426aa46171c6348c308":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"7862a64b0ced43e8b70b7f5684987936":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3ce0440576c4d22a90b74ecfddf9afb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_b0d572405b3344278a443aa21138d927","style":"IPY_MODEL_9fe8115b161a4a309887a31b449f2989"}},"7661a6f07c404d3392d0834ebb51f2d5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_7af3659f738046f0a562d772fba7aadd","tooltips":[],"style":"IPY_MODEL_5306ed2302184ab8ba22c30999cb5572","icons":[]}},"fd4beb5f2be94c609aed0730b98b9fea":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_d7ab081b539e42649eef86e6f7b6c76d","tooltips":[],"style":"IPY_MODEL_9909b484567e49d3a2b619fec9e125b9","icons":[]}},"c24d9d54deb84bbab0da6405aea82569":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7d018bb285e1499692cbb241516046f2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"012518d9797f4087a352a23bf5ba2aaf":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_653d6750617f4c788c17ae743b0da13b","tooltips":[],"style":"IPY_MODEL_1320b18208d0404a8af38e1393051351","icons":[]}},"75ca0d3400af41f0a754c346a121c9b6":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_1e2bcb99927b4a8cb5c7dd4eaac39225","tooltips":[],"style":"IPY_MODEL_feeb7fe2ee5a40e196cd16cfb2ae7635","icons":[]}},"7a6c3a89abf64a438aa69a6d0e63782e":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1734007288465:81734007288465:8
rec_idrec-1029-dup-0rec-1021-dup-0
fname kyleethomas
lname stephensongeorge
stNo 811
add1 rose scott circuitmcmanus place
add2 cordoba anorstoney creek
city 42263130
state vicsa
dob 1946110119630225
ssn 47830855460534
","layout":"IPY_MODEL_2dc9896b314544f3bd71c32c625e1175","style":"IPY_MODEL_2a7ce010e31c474d834773f51158ad6c"}},"8b544a3eb42548698fec50307ca58cf0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":2,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_6ff19e3e507c4bebafd8a1bff6ce55c8","tooltips":[],"style":"IPY_MODEL_cc8a117379724417a5481bb9d17126b5","icons":[]}},"318d9d146d1f41ee9a169043637dadb7":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"cbbfcbe143644072846912c9d8f1c6d7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"5227aa6fa7c749238d811d462cb0fe36":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_44acc8fae0314cb7a33463d2bc6353e7","style":"IPY_MODEL_451cd21ac7b64517b93824dd5ab79460"}},"c80f86a431824631b6626eba7c46fc33":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_17f6fddf67e242588f39e2aaf0558678","style":"IPY_MODEL_da34c9ff8e3b4738a59ec9eb0a39d2cb"}},"47e1703b3d45461f816b4ec1f8ea445a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9d57f12f444b47b58f6982290bc17ba2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b345a2da49d84b559a59792c488d0c1f":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"04911938acd2486e8fc0ded740020ea1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7ef6892a4e7444458465dd5a5e76fae5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"5d8d51ddc216416cb12979d0f38aae5a":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_6542b2868c0c43359d500c3828ef12ef","IPY_MODEL_22483139248d470ca2edbb0b22a669d1","IPY_MODEL_c80f86a431824631b6626eba7c46fc33"],"layout":"IPY_MODEL_952a9f160893406791ec1975a5af971f"}},"4ebfc8728d2c4186a14ab0d9e52ca0c5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6ff19e3e507c4bebafd8a1bff6ce55c8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f1be32a9a51445f98e99e3b4a2c697bb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a7171853339643a48382ec125a26944d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}}}}},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"2400000"}}},"dependencies":{"lakehouse":{"default_lakehouse":"36ef8bc2-c67a-4512-b060-e25489729c71","default_lakehouse_name":"data","default_lakehouse_workspace_id":"e803987a-98b6-445f-815c-3d15c2c46877","known_lakehouses":[{"id":"7e68da48-69ac-4253-b7bf-1f24863ab25a"},{"id":"1ca5fe82-c7a1-494d-825d-9168c65112d1"},{"id":"36ef8bc2-c67a-4512-b060-e25489729c71"}]},"environment":{"environmentId":"1ae2ef87-3a76-4cd3-90b5-e829f7a4ca9c","workspaceId":"e803987a-98b6-445f-815c-3d15c2c46877"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file