Skip to content

Commit

Permalink
Add UTF-8 charset to String(byte[]) constructions
Browse files Browse the repository at this point in the history
- Add UTF-8 charset to new String constructions that use a byte[]
  as argument.

Fixes apache#4765
  • Loading branch information
meatballspaghetti committed Aug 7, 2024
1 parent 8e31fe9 commit c53290c
Show file tree
Hide file tree
Showing 23 changed files with 106 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.accumulo.core.clientImpl;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.easymock.EasyMock.replay;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
Expand Down Expand Up @@ -271,7 +272,7 @@ private void runTest(TabletLocatorImpl metaCache, List<Mutation> ml,
ArrayList<String> efs = new ArrayList<>(List.of(efailures));

for (Mutation mutation : afailures) {
afs.add(new String(mutation.getRow()));
afs.add(new String(mutation.getRow(), UTF_8));
}

Collections.sort(afs);
Expand All @@ -296,7 +297,7 @@ private void verify(Map<String,Map<KeyExtent,List<String>>> expected,
ArrayList<String> aRows = new ArrayList<>();

for (Mutation m : atb.getMutations().get(ke)) {
aRows.add(new String(m.getRow()));
aRows.add(new String(m.getRow(), UTF_8));
}

Collections.sort(eRows);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.accumulo.core.conf;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand All @@ -38,19 +39,19 @@ public void testAuthentication() {
assertEquals("testpass1", ClientProperty.AUTH_TOKEN.getValue(props));
AuthenticationToken token = ClientProperty.getAuthenticationToken(props);
assertTrue(token instanceof PasswordToken);
assertEquals("testpass1", new String(((PasswordToken) token).getPassword()));
assertEquals("testpass1", new String(((PasswordToken) token).getPassword(), UTF_8));

ClientProperty.setAuthenticationToken(props, new PasswordToken("testpass2"));
assertEquals("/////gAAAAl0ZXN0cGFzczI=", ClientProperty.AUTH_TOKEN.getValue(props));
token = ClientProperty.getAuthenticationToken(props);
assertTrue(token instanceof PasswordToken);
assertEquals("testpass2", new String(((PasswordToken) token).getPassword()));
assertEquals("testpass2", new String(((PasswordToken) token).getPassword(), UTF_8));

ClientProperty.setAuthenticationToken(props, new PasswordToken("testpass3"));
assertEquals("/////gAAAAl0ZXN0cGFzczM=", ClientProperty.AUTH_TOKEN.getValue(props));
token = ClientProperty.getAuthenticationToken(props);
assertTrue(token instanceof PasswordToken);
assertEquals("testpass3", new String(((PasswordToken) token).getPassword()));
assertEquals("testpass3", new String(((PasswordToken) token).getPassword(), UTF_8));

ClientProperty.setKerberosKeytab(props, "/path/to/keytab");
assertEquals("/path/to/keytab", ClientProperty.AUTH_TOKEN.getValue(props));
Expand Down
75 changes: 38 additions & 37 deletions core/src/test/java/org/apache/accumulo/core/data/MutationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ public void test1() {

ColumnUpdate cu = updates.get(0);

assertEquals("cf1", new String(cu.getColumnFamily()));
assertEquals("cq1", new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf1", new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq1", new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertFalse(cu.hasTimestamp());

}
Expand All @@ -94,41 +94,41 @@ public void test2() throws IOException {

assertEquals(2, updates.size());

assertEquals("r1", new String(m.getRow()));
assertEquals("r1", new String(m.getRow(), UTF_8));
ColumnUpdate cu = updates.get(0);

assertEquals("cf1", new String(cu.getColumnFamily()));
assertEquals("cq1", new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf1", new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq1", new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertFalse(cu.hasTimestamp());

cu = updates.get(1);

assertEquals("cf2", new String(cu.getColumnFamily()));
assertEquals("cq2", new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf2", new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq2", new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertTrue(cu.hasTimestamp());
assertEquals(56, cu.getTimestamp());

m = cloneMutation(m);

assertEquals("r1", new String(m.getRow()));
assertEquals("r1", new String(m.getRow(), UTF_8));
updates = m.getUpdates();

assertEquals(2, updates.size());

cu = updates.get(0);

assertEquals("cf1", new String(cu.getColumnFamily()));
assertEquals("cq1", new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf1", new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq1", new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertFalse(cu.hasTimestamp());

cu = updates.get(1);

assertEquals("cf2", new String(cu.getColumnFamily()));
assertEquals("cq2", new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf2", new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq2", new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertTrue(cu.hasTimestamp());
assertEquals(56, cu.getTimestamp());

Expand Down Expand Up @@ -163,14 +163,14 @@ public void test3() throws IOException {
}

for (int r = 0; r < 3; r++) {
assertEquals("r1", new String(m.getRow()));
assertEquals("r1", new String(m.getRow(), UTF_8));
List<ColumnUpdate> updates = m.getUpdates();
assertEquals(5, updates.size());
for (int i = 0; i < 5; i++) {
ColumnUpdate cu = updates.get(i);
assertEquals("cf" + i, new String(cu.getColumnFamily()));
assertEquals("cq" + i, new String(cu.getColumnQualifier()));
assertEquals("", new String(cu.getColumnVisibility()));
assertEquals("cf" + i, new String(cu.getColumnFamily(), UTF_8));
assertEquals("cq" + i, new String(cu.getColumnQualifier(), UTF_8));
assertEquals("", new String(cu.getColumnVisibility(), UTF_8));
assertFalse(cu.hasTimestamp());

byte[] val = cu.getValue();
Expand Down Expand Up @@ -296,7 +296,8 @@ public void testAtVisiblityTypes() {

// vis: CharSequence (String implementation)
actual = new Mutation("row5");
actual.at().family(fam).qualifier(qual).visibility(new String(vis.getExpression())).put(val);
actual.at().family(fam).qualifier(qual).visibility(new String(vis.getExpression(), UTF_8))
.put(val);
assertEquals(expected, actual);

// vis: ColumnVisibility
Expand Down Expand Up @@ -488,7 +489,7 @@ public void testPutsStringString() {
m.putDelete("cf8", "cq8", new ColumnVisibility("cv8"), 8L);

assertEquals(8, m.size());
assertEquals("r1", new String(m.getRow()));
assertEquals("r1", new String(m.getRow(), UTF_8));

List<ColumnUpdate> updates = m.getUpdates();

Expand Down Expand Up @@ -555,14 +556,14 @@ public void testMultipleReadFieldsCalls() throws IOException {
m1.put("cf1.2", "cq1.2", new ColumnVisibility("C|D"), "val1.2");
byte[] val1_3 = new byte[Mutation.VALUE_SIZE_COPY_CUTOFF + 3];
Arrays.fill(val1_3, (byte) 3);
m1.put("cf1.3", "cq1.3", new ColumnVisibility("E|F"), new String(val1_3));
m1.put("cf1.3", "cq1.3", new ColumnVisibility("E|F"), new String(val1_3, UTF_8));
int size1 = m1.size();
long nb1 = m1.numBytes();

Mutation m2 = new Mutation("row2");
byte[] val2 = new byte[Mutation.VALUE_SIZE_COPY_CUTOFF + 2];
Arrays.fill(val2, (byte) 2);
m2.put("cf2", "cq2", new ColumnVisibility("G|H"), 1234, new String(val2));
m2.put("cf2", "cq2", new ColumnVisibility("G|H"), 1234, new String(val2, UTF_8));
int size2 = m2.size();
long nb2 = m2.numBytes();

Expand All @@ -580,39 +581,39 @@ public void testMultipleReadFieldsCalls() throws IOException {
Mutation m = new Mutation();
m.readFields(dis);

assertEquals("row1", new String(m.getRow()));
assertEquals("row1", new String(m.getRow(), UTF_8));
assertEquals(size1, m.size());
assertEquals(nb1, m.numBytes());
assertEquals(3, m.getUpdates().size());
verifyColumnUpdate(m.getUpdates().get(0), "cf1.1", "cq1.1", "A|B", 0L, false, false, "val1.1");
verifyColumnUpdate(m.getUpdates().get(1), "cf1.2", "cq1.2", "C|D", 0L, false, false, "val1.2");
verifyColumnUpdate(m.getUpdates().get(2), "cf1.3", "cq1.3", "E|F", 0L, false, false,
new String(val1_3));
new String(val1_3, UTF_8));

// Reuse the same mutation object (which is what happens in the hadoop framework
// when objects are read by an input format)
m.readFields(dis);

assertEquals("row2", new String(m.getRow()));
assertEquals("row2", new String(m.getRow(), UTF_8));
assertEquals(size2, m.size());
assertEquals(nb2, m.numBytes());
assertEquals(1, m.getUpdates().size());
verifyColumnUpdate(m.getUpdates().get(0), "cf2", "cq2", "G|H", 1234L, true, false,
new String(val2));
new String(val2, UTF_8));
}

private void verifyColumnUpdate(ColumnUpdate cu, String cf, String cq, String cv, long ts,
boolean timeSet, boolean deleted, String val) {

assertEquals(cf, new String(cu.getColumnFamily()));
assertEquals(cq, new String(cu.getColumnQualifier()));
assertEquals(cv, new String(cu.getColumnVisibility()));
assertEquals(cf, new String(cu.getColumnFamily(), UTF_8));
assertEquals(cq, new String(cu.getColumnQualifier(), UTF_8));
assertEquals(cv, new String(cu.getColumnVisibility(), UTF_8));
assertEquals(timeSet, cu.hasTimestamp());
if (timeSet) {
assertEquals(ts, cu.getTimestamp());
}
assertEquals(deleted, cu.isDeleted());
assertEquals(val, new String(cu.getValue()));
assertEquals(val, new String(cu.getValue(), UTF_8));
}

@Test
Expand All @@ -639,7 +640,7 @@ public void test4() throws Exception {
// after readFields
m2.readFields(dis);

assertEquals("r1", new String(m2.getRow()));
assertEquals("r1", new String(m2.getRow(), UTF_8));
assertEquals(2, m2.getUpdates().size());
assertEquals(2, m2.size());
verifyColumnUpdate(m2.getUpdates().get(0), "cf1", "cq1", "", 0L, false, false, "v1");
Expand Down Expand Up @@ -677,7 +678,7 @@ public void testNewSerialization() throws Exception {
dis.close();

// check it
assertEquals("r1", new String(m2.getRow()));
assertEquals("r1", new String(m2.getRow(), UTF_8));
assertEquals(3, m2.getUpdates().size());
assertEquals(3, m2.size());
verifyColumnUpdate(m2.getUpdates().get(0), "cf1", "cq1", "", 0L, false, false, "v1");
Expand All @@ -686,7 +687,7 @@ public void testNewSerialization() throws Exception {

Mutation m1 = convert(m2);

assertEquals("r1", new String(m1.getRow()));
assertEquals("r1", new String(m1.getRow(), UTF_8));
assertEquals(3, m1.getUpdates().size());
assertEquals(3, m1.size());
verifyColumnUpdate(m1.getUpdates().get(0), "cf1", "cq1", "", 0L, false, false, "v1");
Expand Down Expand Up @@ -749,7 +750,7 @@ public void testReserialize() throws Exception {
DataInputStream dis = new DataInputStream(bis);
m2.readFields(dis);

assertEquals("r1", new String(m1.getRow()));
assertEquals("r1", new String(m1.getRow(), UTF_8));
assertEquals(4, m2.getUpdates().size());
assertEquals(4, m2.size());
verifyColumnUpdate(m2.getUpdates().get(0), "cf1", "cq1", "", 0L, false, false, "v1");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.accumulo.core.file.rfile;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.accumulo.core.file.rfile.GenerateSplits.getEvenlySpacedSplits;
import static org.apache.accumulo.core.file.rfile.GenerateSplits.main;
import static org.apache.accumulo.core.file.rfile.RFileTest.newColFamByteSequence;
Expand Down Expand Up @@ -124,7 +125,7 @@ private void verifySplitsFile(boolean encoded, String... splits) throws IOExcept

private String decode(boolean decode, String string) {
if (decode) {
return new String(Base64.getDecoder().decode(string));
return new String(Base64.getDecoder().decode(string), UTF_8);
}
return string;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public void test1() throws IOException {

Text d = IndexedDocIterator.parseDocID(k);
assertTrue(docs.contains(d));
assertTrue(new String(v.get()).endsWith(" docID=" + d));
assertTrue(new String(v.get(), UTF_8).endsWith(" docID=" + d));

iter.next();
}
Expand Down Expand Up @@ -251,7 +251,7 @@ public void test2() throws IOException {
Value v = iter.getTopValue();
Text d = IndexedDocIterator.parseDocID(k);
assertTrue(docs.contains(d));
assertTrue(new String(v.get()).endsWith(" docID=" + d));
assertTrue(new String(v.get(), UTF_8).endsWith(" docID=" + d));
iter.next();
}
assertEquals(hitCount, docs.size());
Expand Down Expand Up @@ -296,7 +296,7 @@ public void test3() throws IOException {
Value v = iter.getTopValue();
Text d = IndexedDocIterator.parseDocID(k);
assertTrue(docs.contains(d));
assertTrue(new String(v.get()).endsWith(" docID=" + d));
assertTrue(new String(v.get(), UTF_8).endsWith(" docID=" + d));
iter.next();
}
assertEquals(hitCount, docs.size());
Expand Down Expand Up @@ -339,7 +339,7 @@ public void test4() throws IOException {
Value v = iter.getTopValue();
Text d = IndexedDocIterator.parseDocID(k);
assertTrue(docs.contains(d));
assertTrue(new String(v.get()).endsWith(" docID=" + d));
assertTrue(new String(v.get(), UTF_8).endsWith(" docID=" + d));
iter.next();
}
assertEquals(hitCount, docs.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.accumulo.hadoop.its.mapred;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand Down Expand Up @@ -77,10 +78,10 @@ private static class TestMapper implements Mapper<Key,Value,Key,Value> {
public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter reporter) {
try {
if (key != null) {
assertEquals(key.getRow().toString(), new String(v.get()));
assertEquals(key.getRow().toString(), new String(v.get(), UTF_8));
}
assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
assertEquals(new String(v.get()), String.format("%09x", count));
assertEquals(new String(v.get(), UTF_8), String.format("%09x", count));
} catch (AssertionError e) {
e1 = e;
e1Count++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.accumulo.hadoop.its.mapred;

import static com.google.common.collect.MoreCollectors.onlyElement;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -124,10 +125,10 @@ public void map(Key k, Value v, OutputCollector<Text,Mutation> output, Reporter
finalOutput = output;
try {
if (key != null) {
assertEquals(key.getRow().toString(), new String(v.get()));
assertEquals(key.getRow().toString(), new String(v.get(), UTF_8));
}
assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
assertEquals(new String(v.get()), String.format("%09x", count));
assertEquals(new String(v.get(), UTF_8), String.format("%09x", count));
} catch (AssertionError e) {
e1 = e;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.accumulo.hadoop.its.mapred;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

Expand Down Expand Up @@ -62,10 +63,10 @@ public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter repo
try {
String tableName = ((RangeInputSplit) reporter.getInputSplit()).getTableName();
if (key != null) {
assertEquals(key.getRow().toString(), new String(v.get()));
assertEquals(key.getRow().toString(), new String(v.get(), UTF_8));
}
assertEquals(new Text(String.format("%s_%09x", tableName, count + 1)), k.getRow());
assertEquals(String.format("%s_%09x", tableName, count), new String(v.get()));
assertEquals(String.format("%s_%09x", tableName, count), new String(v.get(), UTF_8));
} catch (AssertionError e) {
e1 = e;
}
Expand Down
Loading

0 comments on commit c53290c

Please sign in to comment.