Skip to content

Commit

Permalink
Merge pull request #1126 from lingbai-kong/parse_imdb
Browse files Browse the repository at this point in the history
Add pad preprocessing for `imdb` dataset
  • Loading branch information
Oceania2018 authored Jul 1, 2023
2 parents 3acfc1d + 4efa0a8 commit dfd9dd0
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions src/TensorFlowNET.Keras/Datasets/Imdb.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public DatasetPass load_data(string path = "imdb.npz",
int oov_char= 2,
int index_from = 3)
{
if (maxlen == -1) throw new InvalidArgumentError("maxlen must be assigned.");

var dst = Download();

var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
Expand All @@ -51,7 +53,7 @@ public DatasetPass load_data(string path = "imdb.npz",
x_train_string[i] = lines[i].Substring(2);
}

var x_train = np.array(x_train_string);
var x_train = keras.preprocessing.sequence.pad_sequences(PraseData(x_train_string), maxlen: maxlen);

File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
var x_test_string = new string[lines.Length];
Expand All @@ -62,7 +64,7 @@ public DatasetPass load_data(string path = "imdb.npz",
x_test_string[i] = lines[i].Substring(2);
}

var x_test = np.array(x_test_string);
var x_test = keras.preprocessing.sequence.pad_sequences(PraseData(x_test_string), maxlen: maxlen);

return new DatasetPass
{
Expand Down Expand Up @@ -93,5 +95,23 @@ string Download()
return dst;
// return Path.Combine(dst, file_name);
}

protected IEnumerable<int[]> PraseData(string[] x)
{
var data_list = new List<int[]>();
for (int i = 0; i < len(x); i++)
{
var list_string = x[i];
var cleaned_list_string = list_string.Replace("[", "").Replace("]", "").Replace(" ", "");
string[] number_strings = cleaned_list_string.Split(',');
int[] numbers = new int[number_strings.Length];
for (int j = 0; j < number_strings.Length; j++)
{
numbers[j] = int.Parse(number_strings[j]);
}
data_list.Add(numbers);
}
return data_list;
}
}
}

0 comments on commit dfd9dd0

Please sign in to comment.