diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/DomainNameReader.java b/url-detector/src/main/java/com/linkedin/urls/detection/DomainNameReader.java index de1b97f..415dd82 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/DomainNameReader.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/DomainNameReader.java @@ -102,7 +102,11 @@ public enum ReaderNextState { /** * Finished reading, next step should be to read the query string. */ - ReadQueryString + ReadQueryString, + /** + * This was actually not a domain at all. + */ + ReadUserPass } /** @@ -332,6 +336,10 @@ public ReaderNextState readDomainName() { } else if (curr == '#') { //continue by reading the fragment return checkDomainNameValid(ReaderNextState.ReadFragment, curr); + } else if (curr == '@') { + //this may not have been a domain after all, but rather a username/password instead + _reader.goBack(); + return ReaderNextState.ReadUserPass; } else if (CharUtils.isDot(curr) || (curr == '%' && _reader.canReadChars(2) && _reader.peek(2).equalsIgnoreCase(HEX_ENCODED_DOT))) { //if the current character is a dot or a urlEncodedDot diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 1b6bc10..6afccb6 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -553,6 +553,10 @@ public void addCharacter(char character) { return readPort(); case ReadQueryString: return readQueryString(); + case ReadUserPass: + int host = _currentUrlMarker.indexOf(UrlPart.HOST); + _currentUrlMarker.unsetIndex(UrlPart.HOST); + return readUserPass(host); default: return readEnd(ReadEndState.InvalidUrl); } diff --git a/url-detector/src/test/java/com/linkedin/urls/TestUrl.java b/url-detector/src/test/java/com/linkedin/urls/TestUrl.java index a8570d2..1e5bc05 100644 --- a/url-detector/src/test/java/com/linkedin/urls/TestUrl.java +++ b/url-detector/src/test/java/com/linkedin/urls/TestUrl.java @@ -28,7 +28,8 @@ private Object[][] getUsernamePasswordUrls() { {"@www.google.com", "www.google.com", "/", "", ""}, {"lalal:@www.gogo.com", "www.gogo.com", "/", "lalal", ""}, {"nono:boo@[::1]", "[::1]", "/", "nono", "boo"}, - {"nono:boo@yahoo.com/@1234", "yahoo.com", "/@1234", "nono", "boo"} + {"nono:boo@yahoo.com/@1234", "yahoo.com", "/@1234", "nono", "boo"}, + {"big.big.boss@google.com", "google.com", "/", "big.big.boss", ""} }; } diff --git a/url-detector/src/test/java/com/linkedin/urls/detection/TestUriDetection.java b/url-detector/src/test/java/com/linkedin/urls/detection/TestUriDetection.java index fe72fe9..3efdd83 100644 --- a/url-detector/src/test/java/com/linkedin/urls/detection/TestUriDetection.java +++ b/url-detector/src/test/java/com/linkedin/urls/detection/TestUriDetection.java @@ -657,6 +657,18 @@ public void testIssue12() { runTest("http://user:pass@host.com host.com", UrlDetectorOptions.Default, "http://user:pass@host.com", "host.com"); } + /* + * https://github.com/linkedin/URL-Detector/issues/13 + */ + @Test + public void testIssue13() { + runTest("user@github.io/page", UrlDetectorOptions.Default, "user@github.io/page"); + runTest("name@gmail.com", UrlDetectorOptions.Default, "name@gmail.com"); + runTest("name.lastname@gmail.com", UrlDetectorOptions.Default, "name.lastname@gmail.com"); + runTest("gmail.com@gmail.com", UrlDetectorOptions.Default, "gmail.com@gmail.com"); + runTest("first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com", UrlDetectorOptions.Default, "first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com"); + } + /* * https://github.com/linkedin/URL-Detector/issues/16 */