From 5b3c5953553bb9fb0b171abc6041e7c7e9ca5b4d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 16 Mar 2023 14:18:28 -0400 Subject: [PATCH] Tighten error checks in datetime input, and remove bogus "ISO" format. DecodeDateTime and DecodeTimeOnly had support for date input in the style "Y2023M03D16", which the comments claimed to be an "ISO" format. However, so far as I can find there is no such format in ISO 8601; they write units before numbers in intervals, but not in datetimes. Furthermore, the lesser-known ISO 8601-2 spec actually defines an incompatible format "2023Y03M16D". None of our documentation mentions such a format either. So let's just drop it. That leaves us with only two cases for a prefix unit specifier in datetimes: Julian dates written as Jnnnn, and the "T" separator defined by ISO 8601. Add checks to catch misuse of these specifiers, that is consecutive specifiers or a dangling specifier at the end of the string. We do not however disallow a specifier that is separated from the field that it disambiguates (by noise words or unrelated fields). That being the case, remove some overly-aggressive error checks from the ISOTIME cases. Joseph Koshakow, editorialized a bit by me; thanks also to Peter Eisentraut for some standards-reading. Discussion: https://postgr.es/m/CAAvxfHf2Q1gKLiHGnuPOiyf0ASvKUM4BnMfsXuwgtYEb_Gx0Zw@mail.gmail.com --- src/backend/utils/adt/datetime.c | 221 ++++--------------------- src/test/regress/expected/horology.out | 72 +++++--- src/test/regress/sql/horology.sql | 19 ++- 3 files changed, 92 insertions(+), 220 deletions(-) diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index a7558d39a0..516ee9c154 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf, int fmask = 0, tmask, type; - int ptype = 0; /* "prefix type" for ISO y2001m02d04 format */ + int ptype = 0; /* "prefix type" for ISO and Julian formats */ int i; int val; int dterr; @@ -1071,6 +1071,9 @@ DecodeDateTime(char **field, int *ftype, int nf, { char *cp; + /* + * Allow a preceding "t" field, but no other units. + */ if (ptype != 0) { /* Sanity check; should not fail this test */ @@ -1175,8 +1178,7 @@ DecodeDateTime(char **field, int *ftype, int nf, case DTK_NUMBER: /* - * Was this an "ISO date" with embedded field labels? An - * example is "y2001m02d04" - thomas 2001-02-04 + * Deal with cases where previous field labeled this one */ if (ptype != 0) { @@ -1187,85 +1189,11 @@ DecodeDateTime(char **field, int *ftype, int nf, value = strtoint(field[i], &cp, 10); if (errno == ERANGE) return DTERR_FIELD_OVERFLOW; - - /* - * only a few kinds are allowed to have an embedded - * decimal - */ - if (*cp == '.') - switch (ptype) - { - case DTK_JULIAN: - case DTK_TIME: - case DTK_SECOND: - break; - default: - return DTERR_BAD_FORMAT; - break; - } - else if (*cp != '\0') + if (*cp != '.' && *cp != '\0') return DTERR_BAD_FORMAT; switch (ptype) { - case DTK_YEAR: - tm->tm_year = value; - tmask = DTK_M(YEAR); - break; - - case DTK_MONTH: - - /* - * already have a month and hour? then assume - * minutes - */ - if ((fmask & DTK_M(MONTH)) != 0 && - (fmask & DTK_M(HOUR)) != 0) - { - tm->tm_min = value; - tmask = DTK_M(MINUTE); - } - else - { - tm->tm_mon = value; - tmask = DTK_M(MONTH); - } - break; - - case DTK_DAY: - tm->tm_mday = value; - tmask = DTK_M(DAY); - break; - - case DTK_HOUR: - tm->tm_hour = value; - tmask = DTK_M(HOUR); - break; - - case DTK_MINUTE: - tm->tm_min = value; - tmask = DTK_M(MINUTE); - break; - - case DTK_SECOND: - tm->tm_sec = value; - tmask = DTK_M(SECOND); - if (*cp == '.') - { - dterr = ParseFractionalSecond(cp, fsec); - if (dterr) - return dterr; - tmask = DTK_ALL_SECS_M; - } - break; - - case DTK_TZ: - tmask = DTK_M(TZ); - dterr = DecodeTimezone(field[i], tzp); - if (dterr) - return dterr; - break; - case DTK_JULIAN: /* previous field was a label for "julian date" */ if (value < 0) @@ -1519,6 +1447,9 @@ DecodeDateTime(char **field, int *ftype, int nf, case UNITS: tmask = 0; + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; ptype = val; break; @@ -1534,18 +1465,9 @@ DecodeDateTime(char **field, int *ftype, int nf, if ((fmask & DTK_DATE_M) != DTK_DATE_M) return DTERR_BAD_FORMAT; - /*** - * We will need one of the following fields: - * DTK_NUMBER should be hhmmss.fff - * DTK_TIME should be hh:mm:ss.fff - * DTK_DATE should be hhmmss-zz - ***/ - if (i >= nf - 1 || - (ftype[i + 1] != DTK_NUMBER && - ftype[i + 1] != DTK_TIME && - ftype[i + 1] != DTK_DATE)) + /* reject consecutive unhandled units */ + if (ptype != 0) return DTERR_BAD_FORMAT; - ptype = val; break; @@ -1576,6 +1498,10 @@ DecodeDateTime(char **field, int *ftype, int nf, fmask |= tmask; } /* end loop over fields */ + /* reject if prefix type appeared and was never handled */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + /* do additional checking for normal date specs (but not "infinity" etc) */ if (*dtype == DTK_DATE) { @@ -1943,7 +1869,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf, int fmask = 0, tmask, type; - int ptype = 0; /* "prefix type" for ISO h04mm05s06 format */ + int ptype = 0; /* "prefix type" for ISO and Julian formats */ int i; int val; int dterr; @@ -2070,112 +1996,26 @@ DecodeTimeOnly(char **field, int *ftype, int nf, case DTK_NUMBER: /* - * Was this an "ISO time" with embedded field labels? An - * example is "h04mm05s06" - thomas 2001-02-04 + * Deal with cases where previous field labeled this one */ if (ptype != 0) { char *cp; int value; - /* Only accept a date under limited circumstances */ - switch (ptype) - { - case DTK_JULIAN: - case DTK_YEAR: - case DTK_MONTH: - case DTK_DAY: - if (tzp == NULL) - return DTERR_BAD_FORMAT; - default: - break; - } - errno = 0; value = strtoint(field[i], &cp, 10); if (errno == ERANGE) return DTERR_FIELD_OVERFLOW; - - /* - * only a few kinds are allowed to have an embedded - * decimal - */ - if (*cp == '.') - switch (ptype) - { - case DTK_JULIAN: - case DTK_TIME: - case DTK_SECOND: - break; - default: - return DTERR_BAD_FORMAT; - break; - } - else if (*cp != '\0') + if (*cp != '.' && *cp != '\0') return DTERR_BAD_FORMAT; switch (ptype) { - case DTK_YEAR: - tm->tm_year = value; - tmask = DTK_M(YEAR); - break; - - case DTK_MONTH: - - /* - * already have a month and hour? then assume - * minutes - */ - if ((fmask & DTK_M(MONTH)) != 0 && - (fmask & DTK_M(HOUR)) != 0) - { - tm->tm_min = value; - tmask = DTK_M(MINUTE); - } - else - { - tm->tm_mon = value; - tmask = DTK_M(MONTH); - } - break; - - case DTK_DAY: - tm->tm_mday = value; - tmask = DTK_M(DAY); - break; - - case DTK_HOUR: - tm->tm_hour = value; - tmask = DTK_M(HOUR); - break; - - case DTK_MINUTE: - tm->tm_min = value; - tmask = DTK_M(MINUTE); - break; - - case DTK_SECOND: - tm->tm_sec = value; - tmask = DTK_M(SECOND); - if (*cp == '.') - { - dterr = ParseFractionalSecond(cp, fsec); - if (dterr) - return dterr; - tmask = DTK_ALL_SECS_M; - } - break; - - case DTK_TZ: - tmask = DTK_M(TZ); - dterr = DecodeTimezone(field[i], tzp); - if (dterr) - return dterr; - break; - case DTK_JULIAN: /* previous field was a label for "julian date" */ + if (tzp == NULL) + return DTERR_BAD_FORMAT; if (value < 0) return DTERR_FIELD_OVERFLOW; tmask = DTK_DATE_M; @@ -2378,24 +2218,17 @@ DecodeTimeOnly(char **field, int *ftype, int nf, case UNITS: tmask = 0; + /* reject consecutive unhandled units */ + if (ptype != 0) + return DTERR_BAD_FORMAT; ptype = val; break; case ISOTIME: tmask = 0; - - /*** - * We will need one of the following fields: - * DTK_NUMBER should be hhmmss.fff - * DTK_TIME should be hh:mm:ss.fff - * DTK_DATE should be hhmmss-zz - ***/ - if (i >= nf - 1 || - (ftype[i + 1] != DTK_NUMBER && - ftype[i + 1] != DTK_TIME && - ftype[i + 1] != DTK_DATE)) + /* reject consecutive unhandled units */ + if (ptype != 0) return DTERR_BAD_FORMAT; - ptype = val; break; @@ -2426,6 +2259,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf, fmask |= tmask; } /* end loop over fields */ + /* reject if prefix type appeared and was never handled */ + if (ptype != 0) + return DTERR_BAD_FORMAT; + /* do final checking/adjustment of Y/M/D fields */ dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm); if (dterr) diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 4f01131077..e63e5b30fe 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -83,6 +83,18 @@ SELECT timestamp with time zone '12/27/2001 04:05:06.789-08'; Thu Dec 27 04:05:06.789 2001 PST (1 row) +SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST'; + timestamptz +---------------------------------- + Wed Dec 26 18:05:06.789 2001 PST +(1 row) + +SELECT timestamp with time zone '2001-12-27 allballs'; + timestamptz +------------------------------ + Wed Dec 26 16:00:00 2001 PST +(1 row) + -- should fail in mdy mode: SELECT timestamp with time zone '27/12/2001 04:05:06.789-08'; ERROR: date/time field value out of range: "27/12/2001 04:05:06.789-08" @@ -97,30 +109,6 @@ SELECT timestamp with time zone '27/12/2001 04:05:06.789-08'; (1 row) reset datestyle; -SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; - timestamptz ----------------------------------- - Wed Dec 26 12:05:06.789 2001 PST -(1 row) - -SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08'; - timestamptz ----------------------------------- - Thu Dec 27 04:05:06.789 2001 PST -(1 row) - -SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08'; - timestamptz ----------------------------------- - Wed Dec 26 12:05:06.789 2001 PST -(1 row) - -SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; - timestamptz ----------------------------------- - Thu Dec 27 04:05:06.789 2001 PST -(1 row) - SELECT timestamp with time zone 'J2452271+08'; timestamptz ------------------------------ @@ -269,6 +257,23 @@ SELECT time with time zone 'T040506.789 -08'; 04:05:06.789-08 (1 row) +-- time with time zone should accept a date for DST resolution purposes +SELECT time with time zone 'T040506.789 America/Los_Angeles'; +ERROR: invalid input syntax for type time with time zone: "T040506.789 America/Los_Angeles" +LINE 1: SELECT time with time zone 'T040506.789 America/Los_Angeles'... + ^ +SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles'; + timetz +----------------- + 04:05:06.789-08 +(1 row) + +SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles'; + timetz +----------------- + 04:05:06.789-08 +(1 row) + SET DateStyle = 'Postgres, MDY'; -- Check Julian dates BC SELECT date 'J1520447' AS "Confucius' Birthday"; @@ -283,6 +288,25 @@ SELECT date 'J0' AS "Julian Epoch"; 11-24-4714 BC (1 row) +-- test error on dangling Julian units +SELECT date '1995-08-06 J J J'; +ERROR: invalid input syntax for type date: "1995-08-06 J J J" +LINE 1: SELECT date '1995-08-06 J J J'; + ^ +SELECT date 'J J 1520447'; +ERROR: invalid input syntax for type date: "J J 1520447" +LINE 1: SELECT date 'J J 1520447'; + ^ +-- We used to accept this input style, but it was based on a misreading +-- of ISO8601, and it was never documented anyway +SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; +ERROR: invalid input syntax for type timestamp with time zone: "Y2001M12D27H04M05S06.789+08" +LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08... + ^ +SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; +ERROR: invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08" +LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0... + ^ -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; ERROR: invalid input syntax for type date: "1995-08-06 epoch" diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index 0676cac5d1..f7f8c8d2dd 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -20,15 +20,13 @@ SELECT timestamp with time zone '2001-12-27 04:05:06.789-08'; SELECT timestamp with time zone '2001.12.27 04:05:06.789-08'; SELECT timestamp with time zone '2001/12/27 04:05:06.789-08'; SELECT timestamp with time zone '12/27/2001 04:05:06.789-08'; +SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST'; +SELECT timestamp with time zone '2001-12-27 allballs'; -- should fail in mdy mode: SELECT timestamp with time zone '27/12/2001 04:05:06.789-08'; set datestyle to dmy; SELECT timestamp with time zone '27/12/2001 04:05:06.789-08'; reset datestyle; -SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; -SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08'; -SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08'; -SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; SELECT timestamp with time zone 'J2452271+08'; SELECT timestamp with time zone 'J2452271-08'; SELECT timestamp with time zone 'J2452271.5+08'; @@ -57,11 +55,24 @@ SELECT time with time zone 'T040506.789+08'; SELECT time with time zone 'T040506.789-08'; SELECT time with time zone 'T040506.789 +08'; SELECT time with time zone 'T040506.789 -08'; +-- time with time zone should accept a date for DST resolution purposes +SELECT time with time zone 'T040506.789 America/Los_Angeles'; +SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles'; +SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles'; SET DateStyle = 'Postgres, MDY'; -- Check Julian dates BC SELECT date 'J1520447' AS "Confucius' Birthday"; SELECT date 'J0' AS "Julian Epoch"; +-- test error on dangling Julian units +SELECT date '1995-08-06 J J J'; +SELECT date 'J J 1520447'; + +-- We used to accept this input style, but it was based on a misreading +-- of ISO8601, and it was never documented anyway +SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; +SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; + -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; SELECT date '1995-08-06 infinity';