Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@f5ab7c1
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#13168 from pdet/weird_dates
  • Loading branch information
krlmlr committed Aug 9, 2024
1 parent d09a35d commit ba4ede9
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
unordered_map<idx_t, vector<LogicalType>> &info_sql_types_candidates,
idx_t start_idx_detection) {
const idx_t chunk_size = data_chunk.size();
HasType has_type;
for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
auto &cur_vector = data_chunk.data[col_idx];
D_ASSERT(cur_vector.GetVectorType() == VectorType::FLAT_VECTOR);
Expand All @@ -339,8 +340,8 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
// If Value is not Null, Has a numeric date format, and the current investigated candidate is
// either a timestamp or a date
if (null_mask.RowIsValid(row_idx) && StartsWithNumericDate(separator, vector_data[row_idx]) &&
(col_type_candidates.back().id() == LogicalTypeId::TIMESTAMP ||
col_type_candidates.back().id() == LogicalTypeId::DATE)) {
((col_type_candidates.back().id() == LogicalTypeId::TIMESTAMP && !has_type.timestamp) ||
(col_type_candidates.back().id() == LogicalTypeId::DATE && !has_type.date))) {
DetectDateAndTimeStampFormats(state_machine, sql_type, separator, vector_data[row_idx]);
}
// try cast from string to sql_type
Expand All @@ -364,6 +365,12 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
col_type_candidates.pop_back();
}
}
if (col_type_candidates.back().id() == LogicalTypeId::DATE) {
has_type.date = true;
}
if (col_type_candidates.back().id() == LogicalTypeId::TIMESTAMP) {
has_type.timestamp = true;
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "1-dev4041"
#define DUCKDB_PATCH_VERSION "1-dev4052"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 0
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.0.1-dev4041"
#define DUCKDB_VERSION "v1.0.1-dev4052"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "ad7df1eabc"
#define DUCKDB_SOURCE_ID "f5ab7c167e"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "duckdb.h"
#include "fast_float/fast_float.h"
#include "duckdb/common/string_util.hpp"

namespace duckdb {
template <class T>
Expand Down Expand Up @@ -37,7 +38,7 @@ static bool TryDoubleCast(const char *buf, idx_t len, T &result, bool strict, ch
}
}
auto endptr = buf + len;
auto parse_result = duckdb_fast_float::from_chars(buf, buf + len, result, decimal_separator);
auto parse_result = duckdb_fast_float::from_chars(buf, buf + len, result, strict, decimal_separator);
if (parse_result.ec != std::errc()) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
return false;
}

if (pos != len && buf[pos] == '_') {
if (pos != len && buf[pos] == '_' && !strict) {
// Skip one underscore if it is not the last character and followed by a digit
pos++;
if (pos == len || !StringUtil::CharacterIsDigit(buf[pos])) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct DateTimestampSniffing {
bool initialized = false;
bool had_match = false;
vector<string> format;
idx_t initial_size;
};
//! Struct to store the result of the Sniffer
struct SnifferResult {
Expand Down Expand Up @@ -107,6 +108,12 @@ struct SetColumns {
}
};

//! Struct used to know if we have a date or timestamp type already identified in this CSV File
struct HasType {
bool date = false;
bool timestamp = false;
};

//! Sniffer that detects Header, Dialect and Types of CSV Files
class CSVSniffer {
public:
Expand Down
24 changes: 20 additions & 4 deletions src/duckdb/third_party/fast_float/fast_float/fast_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct from_chars_result {
*/
template<typename T>
from_chars_result from_chars(const char *first, const char *last,
T &value,
T &value, bool strict=false,
const char decimal_separator = '.',
chars_format fmt = chars_format::general) noexcept;

Expand Down Expand Up @@ -504,7 +504,7 @@ struct parsed_number_string {
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
fastfloat_really_inline
parsed_number_string parse_number_string(const char *p, const char *pend, const char decimal_separator, chars_format fmt) noexcept {
parsed_number_string parse_number_string(const char *p, const char *pend, const char decimal_separator, chars_format fmt, bool strict) noexcept {
parsed_number_string answer;
answer.valid = false;
answer.too_many_digits = false;
Expand All @@ -530,6 +530,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, const
uint64_t(*p - '0'); // might overflow, we will handle the overflow later
++p;
if(p != pend && *p == '_') {
if (strict) {
answer.valid = false;
return answer;
}
// skip 1 underscore if it is not the last character and followed by a digit
++p;
if(p == pend || !is_integer(*p)) {
Expand Down Expand Up @@ -565,6 +569,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, const
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok

if(p != pend && *p == '_') {
if (strict) {
answer.valid = false;
return answer;
}
// skip 1 underscore if it is not the last character and followed by a digit
++p;
++skipped_underscores;
Expand Down Expand Up @@ -611,6 +619,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, const
++p;

if(p != pend && *p == '_') {
if (strict) {
answer.valid = false;
return answer;
}
// skip 1 underscore if it is not the last character and followed by a digit
++p;
if(p == pend || !is_integer(*p)) {
Expand Down Expand Up @@ -659,6 +671,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, const
++p;

if(p != pend && *p == '_') {
if (strict) {
answer.valid = false;
return answer;
}
// skip 1 underscore if it is not the last character and followed by a digit
++p;
if(p == pend || !is_integer(*p)) {
Expand Down Expand Up @@ -2445,7 +2461,7 @@ fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &va

template<typename T>
from_chars_result from_chars(const char *first, const char *last,
T &value, const char decimal_separator, chars_format fmt
T &value, bool strict, const char decimal_separator, chars_format fmt
/*= chars_format::general*/) noexcept {
static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");

Expand All @@ -2456,7 +2472,7 @@ from_chars_result from_chars(const char *first, const char *last,
answer.ptr = first;
return answer;
}
parsed_number_string pns = parse_number_string(first, last, decimal_separator, fmt);
parsed_number_string pns = parse_number_string(first, last, decimal_separator, fmt, strict);
if (!pns.valid) {
return detail::parse_infnan(first, last, value);
}
Expand Down

0 comments on commit ba4ede9

Please sign in to comment.