NetBSD/gnu/usr.bin/rcs/lib/partime.c

/*	$NetBSD: partime.c,v 1.4 1996/10/15 07:00:10 veego Exp $	*/

/* Parse a string, yielding a struct partime that describes it.  */

/* Copyright 1993, 1994, 1995 Paul Eggert
   Distributed under license by the Free Software Foundation, Inc.

This file is part of RCS.

RCS is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

RCS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with RCS; see the file COPYING.
If not, write to the Free Software Foundation,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

Report problems and direct all questions to:

    rcs-bugs@cs.purdue.edu

*/

#if has_conf_h
#	include "conf.h"
#else
#	ifdef __STDC__
#		define P(x) x
#	else
#		define const
#		define P(x) ()
#	endif
#	include <limits.h>
#	include <time.h>
#endif

#include <ctype.h>
#undef isdigit
#define isdigit(c) (((unsigned)(c)-'0') <= 9) /* faster than stock */

#include "partime.h"

char const partimeId[]
  = "Id: partime.c,v 5.13 1995/06/16 06:19:24 eggert Exp";


/* Lookup tables for names of months, weekdays, time zones.  */

#define NAME_LENGTH_MAXIMUM 4

struct name_val {
	char name[NAME_LENGTH_MAXIMUM];
	int val;
};


static char const *parse_decimal P((char const*,int,int,int,int,int*,int*));
static char const *parse_fixed P((char const*,int,int*));
static char const *parse_pattern_letter P((char const*,int,struct partime*));
static char const *parse_prefix P((char const*,struct partime*,int*));
static char const *parse_ranged P((char const*,int,int,int,int*));
static int lookup P((char const*,struct name_val const[]));
static int merge_partime P((struct partime*, struct partime const*));
static void undefine P((struct partime*));


static struct name_val const month_names[] = {
	{"jan",0}, {"feb",1}, {"mar",2}, {"apr",3}, {"may",4}, {"jun",5},
	{"jul",6}, {"aug",7}, {"sep",8}, {"oct",9}, {"nov",10}, {"dec",11},
	{"", TM_UNDEFINED}
};

static struct name_val const weekday_names[] = {
	{"sun",0}, {"mon",1}, {"tue",2}, {"wed",3}, {"thu",4}, {"fri",5}, {"sat",6},
	{"", TM_UNDEFINED}
};

#define hr60nonnegative(t)  ((t)/100 * 60  +  (t)%100)
#define hr60(t)  ((t)<0 ? -hr60nonnegative(-(t)) : hr60nonnegative(t))
#define zs(t,s)  {s, hr60(t)}
#define zd(t,s,d)  zs(t, s),  zs((t)+100, d)

static struct name_val const zone_names[] = {
	zs(-1000, "hst"),		/* Hawaii */
	zd(-1000,"hast","hadt"),/* Hawaii-Aleutian */
	zd(- 900,"akst","akdt"),/* Alaska */
	zd(- 800, "pst", "pdt"),/* Pacific */
	zd(- 700, "mst", "mdt"),/* Mountain */
	zd(- 600, "cst", "cdt"),/* Central */
	zd(- 500, "est", "edt"),/* Eastern */
	zd(- 400, "ast", "adt"),/* Atlantic */
	zd(- 330, "nst", "ndt"),/* Newfoundland */
	zs(  000, "utc"),		/* Coordinated Universal */
	zs(  000, "cut"),		/* " */
	zs(  000,  "ut"),		/* Universal */
	zs(  000,   "z"),		/* Zulu (required by ISO 8601) */
	zd(  000, "gmt", "bst"),/* Greenwich Mean, British Summer */
	zs(  000, "wet"),		/* Western Europe */
	zs(  100, "met"),		/* Middle Europe */
	zs(  100, "cet"),		/* Central Europe */
	zs(  200, "eet"),		/* Eastern Europe */
	zs(  530, "ist"),		/* India */
	zd(  900, "jst", "jdt"),/* Japan */
	zd(  900, "kst", "kdt"),/* Korea */
	zd( 1200,"nzst","nzdt"),/* New Zealand */
	{ "lt", 1 },
#if 0
	/* The following names are duplicates or are not well attested.  */
	zs(-1100, "sst"),		/* Samoa */
	zs(-1000, "tht"),		/* Tahiti */
	zs(- 930, "mqt"),		/* Marquesas */
	zs(- 900, "gbt"),		/* Gambier */
	zd(- 900, "yst", "ydt"),/* Yukon - name is no longer used */
	zs(- 830, "pit"),		/* Pitcairn */
	zd(- 500, "cst", "cdt"),/* Cuba */
	zd(- 500, "ast", "adt"),/* Acre */
	zd(- 400, "wst", "wdt"),/* Western Brazil */
	zd(- 400, "ast", "adt"),/* Andes */
	zd(- 400, "cst", "cdt"),/* Chile */
	zs(- 300, "wgt"),		/* Western Greenland */
	zd(- 300, "est", "edt"),/* Eastern South America */
	zs(- 300, "mgt"),		/* Middle Greenland */
	zd(- 200, "fst", "fdt"),/* Fernando de Noronha */
	zs(- 100, "egt"),		/* Eastern Greenland */
	zs(- 100, "aat"),		/* Atlantic Africa */
	zs(- 100, "act"),		/* Azores and Canaries */
	zs(  000, "wat"),		/* West Africa */
	zs(  100, "cat"),		/* Central Africa */
	zd(  100, "mez","mesz"),/* Mittel-Europaeische Zeit */
	zs(  200, "sat"),		/* South Africa */
	zd(  200, "ist", "idt"),/* Israel */
	zs(  300, "eat"),		/* East Africa */
	zd(  300, "ast", "adt"),/* Arabia */
	zd(  300, "msk", "msd"),/* Moscow */
	zd(  330, "ist", "idt"),/* Iran */
	zs(  400, "gst"),		/* Gulf */
	zs(  400, "smt"),		/* Seychelles & Mascarene */
	zd(  400, "esk", "esd"),/* Yekaterinburg */
	zd(  400, "bsk", "bsd"),/* Baku */
	zs(  430, "aft"),		/* Afghanistan */
	zd(  500, "osk", "osd"),/* Omsk */
	zs(  500, "pkt"),		/* Pakistan */
	zd(  500, "tsk", "tsd"),/* Tashkent */
	zs(  545, "npt"),		/* Nepal */
	zs(  600, "bgt"),		/* Bangladesh */
	zd(  600, "nsk", "nsd"),/* Novosibirsk */
	zs(  630, "bmt"),		/* Burma */
	zs(  630, "cct"),		/* Cocos */
	zs(  700, "ict"),		/* Indochina */
	zs(  700, "jvt"),		/* Java */
	zd(  700, "isk", "isd"),/* Irkutsk */
	zs(  800, "hkt"),		/* Hong Kong */
	zs(  800, "pst"),		/* Philippines */
	zs(  800, "sgt"),		/* Singapore */
	zd(  800, "cst", "cdt"),/* China */
	zd(  800, "ust", "udt"),/* Ulan Bator */
	zd(  800, "wst", "wst"),/* Western Australia */
	zd(  800, "ysk", "ysd"),/* Yakutsk */
	zs(  900, "blt"),		/* Belau */
	zs(  900, "mlt"),		/* Moluccas */
	zd(  900, "vsk", "vsd"),/* Vladivostok */
	zd(  930, "cst", "cst"),/* Central Australia */
	zs( 1000, "gst"),		/* Guam */
	zd( 1000, "gsk", "gsd"),/* Magadan */
	zd( 1000, "est", "est"),/* Eastern Australia */
	zd( 1100,"lhst","lhst"),/* Lord Howe */
	zd( 1100, "psk", "psd"),/* Petropavlovsk-Kamchatski */
	zs( 1100,"ncst"),		/* New Caledonia */
	zs( 1130,"nrft"),		/* Norfolk */
	zd( 1200, "ask", "asd"),/* Anadyr */
	zs( 1245,"nz-chat"),	/* Chatham */
	zs( 1300, "tgt"),		/* Tongatapu */
#endif
	{"", -1}
};

	static int
lookup (s, table)
	char const *s;
	struct name_val const table[];
/* Look for a prefix of S in TABLE, returning val for first matching entry.  */
{
	int j;
	char buf[NAME_LENGTH_MAXIMUM];

	for (j = 0;  j < NAME_LENGTH_MAXIMUM;  j++) {
		unsigned char c = *s++;
		buf[j] = isupper (c) ? tolower (c) : c;
		if (!isalpha (c))
			break;
	}
	for (;  table[0].name[0];  table++)
		for (j = 0;  buf[j] == table[0].name[j];  )
			if (++j == NAME_LENGTH_MAXIMUM  ||  !table[0].name[j])
				goto done;
  done:
	return table[0].val;
}


	static void
undefine (t) struct partime *t;
/* Set *T to ``undefined'' values.  */
{
	t->tm.tm_sec = t->tm.tm_min = t->tm.tm_hour = t->tm.tm_mday = t->tm.tm_mon
		= t->tm.tm_year = t->tm.tm_wday = t->tm.tm_yday
		= t->ymodulus = t->yweek
		= TM_UNDEFINED;
	t->zone = TM_UNDEFINED_ZONE;
}

/*
* Array of patterns to look for in a date string.
* Order is important: we look for the first matching pattern
* whose values do not contradict values that we already know about.
* See `parse_pattern_letter' below for the meaning of the pattern codes.
*/
static char const * const patterns[] = {
	/*
	* These traditional patterns must come first,
	* to prevent an ISO 8601 format from misinterpreting their prefixes.
	*/
	"E_n_y", "x", /* RFC 822 */
	"E_n", "n_E", "n", "t:m:s_A", "t:m_A", "t_A", /* traditional */
	"y/N/D$", /* traditional RCS */

	/* ISO 8601:1988 formats, generalized a bit.  */
	"y-N-D$", "4ND$", "Y-N$",
	"RND$", "-R=N$", "-R$", "--N=D$", "N=DT",
	"--N$", "---D$", "DT",
	"Y-d$", "4d$", "R=d$", "-d$", "dT",
	"y-W-X", "yWX", "y=W",
	"-r-W-X", "r-W-XT", "-rWX", "rWXT", "-W=X", "W=XT", "-W",
	"-w-X", "w-XT", "---X$", "XT", "4$",
	"T",
	"h:m:s$", "hms$", "h:m$", "hm$", "h$", "-m:s$", "-ms$", "-m$", "--s$",
	"Y", "Z",

	0
};

	static char const *
parse_prefix (str, t, pi) char const *str; struct partime *t; int *pi;
/*
* Parse an initial prefix of STR, setting *T accordingly.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
* Start with pattern *PI; if success, set *PI to the next pattern to try.
* Set *PI to -1 if we know there are no more patterns to try;
* if *PI is initially negative, give up immediately.
*/
{
	int i = *pi;
	char const *pat;
	unsigned char c;

	if (i < 0)
		return 0;

	/* Remove initial noise.  */
	while (!isalnum (c = *str)  &&  c != '-'  &&  c != '+') {
		if (!c) {
			undefine (t);
			*pi = -1;
			return str;
		}
		str++;
	}

	/* Try a pattern until one succeeds.  */
	while ((pat = patterns[i++]) != 0) {
		char const *s = str;
		undefine (t);
		do {
			if (!(c = *pat++)) {
				*pi = i;
				return s;
			}
		} while ((s = parse_pattern_letter (s, c, t)) != 0);
	}

	return 0;
}

	static char const *
parse_fixed (s, digits, res) char const *s; int digits, *res;
/*
* Parse an initial prefix of S of length DIGITS; it must be a number.
* Store the parsed number into *RES.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
*/
{
	int n = 0;
	char const *lim = s + digits;
	while (s < lim) {
		unsigned d = *s++ - '0';
		if (9 < d)
			return 0;
		n = 10*n + d;
	}
	*res = n;
	return s;
}

	static char const *
parse_ranged (s, digits, lo, hi, res) char const *s; int digits, lo, hi, *res;
/*
* Parse an initial prefix of S of length DIGITS;
* it must be a number in the range LO through HI.
* Store the parsed number into *RES.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
*/
{
	s = parse_fixed (s, digits, res);
	return  s && lo<=*res && *res<=hi  ?  s  :  0;
}

	static char const *
parse_decimal (s, digits, lo, hi, resolution, res, fres)
	char const *s;
	int digits, lo, hi, resolution, *res, *fres;
/*
* Parse an initial prefix of S of length DIGITS;
* it must be a number in the range LO through HI
* and it may be followed by a fraction that is to be computed using RESOLUTION.
* Store the parsed number into *RES; store the fraction times RESOLUTION,
* rounded to the nearest integer, into *FRES.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
*/
{
	s = parse_fixed (s, digits, res);
	if (s && lo<=*res && *res<=hi) {
		int f = 0;
		if ((s[0]==',' || s[0]=='.')  &&  isdigit ((unsigned char) s[1])) {
			char const *s1 = ++s;
			int num10 = 0, denom10 = 10, product;
			while (isdigit ((unsigned char) *++s))
				denom10 *= 10;
			s = parse_fixed (s1, s - s1, &num10);
			product = num10*resolution;
			f = (product + (denom10>>1)) / denom10;
			f -= f & (product%denom10 == denom10>>1); /* round to even */
			if (f < 0  ||  product/resolution != num10)
				return 0; /* overflow */
		}
		*fres = f;
		return s;
	}
	return 0;
}

	char *
parzone (s, zone) char const *s; long *zone;
/*
* Parse an initial prefix of S; it must denote a time zone.
* Set *ZONE to the number of seconds east of GMT,
* or to TM_LOCAL_ZONE if it is the local time zone.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
*/
{
	char sign;
	int hh, mm, ss;
	int minutesEastOfUTC;
	long offset, z;

	/*
	* The formats are LT, n, n DST, nDST, no, o
	* where n is a time zone name
	* and o is a time zone offset of the form [-+]hh[:mm[:ss]].
	*/
	switch (*s) {
		case '-': case '+':
			z = 0;
			break;

		default:
			minutesEastOfUTC = lookup (s, zone_names);
			if (minutesEastOfUTC == -1)
				return 0;

			/* Don't bother to check rest of spelling.  */
			while (isalpha ((unsigned char) *s))
				s++;

			/* Don't modify LT.  */
			if (minutesEastOfUTC == 1) {
				*zone = TM_LOCAL_ZONE;
				return (char *) s;
			}

			z = minutesEastOfUTC * 60L;

			/* Look for trailing " DST".  */
			if (
				(s[-1]=='T' || s[-1]=='t') &&
				(s[-2]=='S' || s[-2]=='s') &&
				(s[-3]=='D' || s[-3]=='t')
			)
				goto trailing_dst;
			while (isspace ((unsigned char) *s))
				s++;
			if (
				(s[0]=='D' || s[0]=='d') &&
				(s[1]=='S' || s[1]=='s') &&
				(s[2]=='T' || s[2]=='t')
			) {
				s += 3;
			  trailing_dst:
				*zone = z + 60*60;
				return (char *) s;
			}

			switch (*s) {
				case '-': case '+': break;
				default: return (char *) s;
			}
	}
	sign = *s++;

	if (!(s = parse_ranged (s, 2, 0, 23, &hh)))
		return 0;
	mm = ss = 0;
	if (*s == ':')
		s++;
	if (isdigit ((unsigned char) *s)) {
		if (!(s = parse_ranged (s, 2, 0, 59, &mm)))
			return 0;
		if (*s==':' && s[-3]==':' && isdigit ((unsigned char) s[1])) {
			if (!(s = parse_ranged (s + 1, 2, 0, 59, &ss)))
				return 0;
		}
	}
	if (isdigit ((unsigned char) *s))
		return 0;
	offset = (hh*60 + mm)*60L + ss;
	*zone = z + (sign=='-' ? -offset : offset);
	/*
	* ?? Are fractions allowed here?
	* If so, they're not implemented.
	*/
	return (char *) s;
}

	static char const *
parse_pattern_letter (s, c, t) char const *s; int c; struct partime *t;
/*
* Parse an initial prefix of S, matching the pattern whose code is C.
* Set *T accordingly.
* Return the first character after the prefix, or 0 if it couldn't be parsed.
*/
{
	switch (c) {
		case '$': /* The next character must be a non-digit.  */
			if (isdigit ((unsigned char) *s))
				return 0;
			break;

		case '-': case '/': case ':':
			/* These characters stand for themselves.  */
			if (*s++ != c)
				return 0;
			break;

		case '4': /* 4-digit year */
			s = parse_fixed (s, 4, &t->tm.tm_year);
			break;

		case '=': /* optional '-' */
			s  +=  *s == '-';
			break;

		case 'A': /* AM or PM */
			/*
			* This matches the regular expression [AaPp][Mm]?.
			* It must not be followed by a letter or digit;
			* otherwise it would match prefixes of strings like "PST".
			*/
			switch (*s++) {
				case 'A': case 'a':
					if (t->tm.tm_hour == 12)
						t->tm.tm_hour = 0;
					break;

				case 'P': case 'p':
					if (t->tm.tm_hour != 12)
						t->tm.tm_hour += 12;
					break;

				default: return 0;
			}
			switch (*s) {
				case 'M': case 'm': s++; break;
			}
			if (isalnum (*s))
				return 0;
			break;

		case 'D': /* day of month [01-31] */
			s = parse_ranged (s, 2, 1, 31, &t->tm.tm_mday);
			break;

		case 'd': /* day of year [001-366] */
			s = parse_ranged (s, 3, 1, 366, &t->tm.tm_yday);
			t->tm.tm_yday--;
			break;

		case 'E': /* extended day of month [1-9, 01-31] */
			s = parse_ranged (s, (
				isdigit ((unsigned char) s[0]) &&
				isdigit ((unsigned char) s[1])
			) + 1, 1, 31, &t->tm.tm_mday);
			break;

		case 'h': /* hour [00-23 followed by optional fraction] */
			{
				int frac;
				s = parse_decimal (s, 2, 0, 23, 60*60, &t->tm.tm_hour, &frac);
				t->tm.tm_min = frac / 60;
				t->tm.tm_sec = frac % 60;
			}
			break;

		case 'm': /* minute [00-59 followed by optional fraction] */
			s = parse_decimal (s, 2, 0, 59, 60, &t->tm.tm_min, &t->tm.tm_sec);
			break;

		case 'n': /* month name [e.g. "Jan"] */
			if (!TM_DEFINED (t->tm.tm_mon = lookup (s, month_names)))
				return 0;
			/* Don't bother to check rest of spelling.  */
			while (isalpha ((unsigned char) *s))
				s++;
			break;

		case 'N': /* month [01-12] */
			s = parse_ranged (s, 2, 1, 12, &t->tm.tm_mon);
			t->tm.tm_mon--;
			break;

		case 'r': /* year % 10 (remainder in origin-0 decade) [0-9] */
			s = parse_fixed (s, 1, &t->tm.tm_year);
			t->ymodulus = 10;
			break;

		case_R:
		case 'R': /* year % 100 (remainder in origin-0 century) [00-99] */
			s = parse_fixed (s, 2, &t->tm.tm_year);
			t->ymodulus = 100;
			break;

		case 's': /* second [00-60 followed by optional fraction] */
			{
				int frac;
				s = parse_decimal (s, 2, 0, 60, 1, &t->tm.tm_sec, &frac);
				t->tm.tm_sec += frac;
			}
			break;

		case 'T': /* 'T' or 't' */
			switch (*s++) {
				case 'T': case 't': break;
				default: return 0;
			}
			break;

		case 't': /* traditional hour [1-9 or 01-12] */
			s = parse_ranged (s, (
				isdigit ((unsigned char) s[0]) && isdigit ((unsigned char) s[1])
			) + 1, 1, 12, &t->tm.tm_hour);
			break;

		case 'w': /* 'W' or 'w' only (stands for current week) */
			switch (*s++) {
				case 'W': case 'w': break;
				default: return 0;
			}
			break;

		case 'W': /* 'W' or 'w', followed by a week of year [00-53] */
			switch (*s++) {
				case 'W': case 'w': break;
				default: return 0;
			}
			s = parse_ranged (s, 2, 0, 53, &t->yweek);
			break;

		case 'X': /* weekday (1=Mon ... 7=Sun) [1-7] */
			s = parse_ranged (s, 1, 1, 7, &t->tm.tm_wday);
			t->tm.tm_wday--;
			break;

		case 'x': /* weekday name [e.g. "Sun"] */
			if (!TM_DEFINED (t->tm.tm_wday = lookup (s, weekday_names)))
				return 0;
			/* Don't bother to check rest of spelling.  */
			while (isalpha ((unsigned char) *s))
				s++;
			break;

		case 'y': /* either R or Y */
			if (
				isdigit ((unsigned char) s[0]) &&
				isdigit ((unsigned char) s[1]) &&
				!isdigit ((unsigned char) s[2])
			)
				goto case_R;
			/* fall into */
		case 'Y': /* year in full [4 or more digits] */
			{
				int len = 0;
				while (isdigit ((unsigned char) s[len]))
					len++;
				if (len < 4)
					return 0;
				s = parse_fixed (s, len, &t->tm.tm_year);
			}
			break;

		case 'Z': /* time zone */
			s = parzone (s, &t->zone);
			break;

		case '_': /* possibly empty sequence of non-alphanumerics */
			while (!isalnum (*s)  &&  *s)
				s++;
			break;

		default: /* bad pattern */
			return 0;
	}
	return s;
}

	static int
merge_partime (t, u) struct partime *t; struct partime const *u;
/*
* If there is no conflict, merge into *T the additional information in *U
* and return 0.  Otherwise do nothing and return -1.
*/
{
#	define conflict(a,b) ((a) != (b)  &&  TM_DEFINED (a)  &&  TM_DEFINED (b))
	if (
		conflict (t->tm.tm_sec, u->tm.tm_sec) ||
		conflict (t->tm.tm_min, u->tm.tm_min) ||
		conflict (t->tm.tm_hour, u->tm.tm_hour) ||
		conflict (t->tm.tm_mday, u->tm.tm_mday) ||
		conflict (t->tm.tm_mon, u->tm.tm_mon) ||
		conflict (t->tm.tm_year, u->tm.tm_year) ||
		conflict (t->tm.tm_wday, u->tm.tm_yday) ||
		conflict (t->ymodulus, u->ymodulus) ||
		conflict (t->yweek, u->yweek) ||
		(
			t->zone != u->zone &&
			t->zone != TM_UNDEFINED_ZONE &&
			u->zone != TM_UNDEFINED_ZONE
		)
	)
		return -1;
#	undef conflict
#	define merge_(a,b) if (TM_DEFINED (b)) (a) = (b);
	merge_ (t->tm.tm_sec, u->tm.tm_sec)
	merge_ (t->tm.tm_min, u->tm.tm_min)
	merge_ (t->tm.tm_hour, u->tm.tm_hour)
	merge_ (t->tm.tm_mday, u->tm.tm_mday)
	merge_ (t->tm.tm_mon, u->tm.tm_mon)
	merge_ (t->tm.tm_year, u->tm.tm_year)
	merge_ (t->tm.tm_wday, u->tm.tm_yday)
	merge_ (t->ymodulus, u->ymodulus)
	merge_ (t->yweek, u->yweek)
#	undef merge_
	if (u->zone != TM_UNDEFINED_ZONE) t->zone = u->zone;
	return 0;
}

	char *
partime (s, t) char const *s; struct partime *t;
/*
* Parse a date/time prefix of S, putting the parsed result into *T.
* Return the first character after the prefix.
* The prefix may contain no useful information;
* in that case, *T will contain only undefined values.
*/
{
	struct partime p;

	undefine (t);
	while (*s) {
		int i = 0;
		char const *s1;
		do {
			if (!(s1 = parse_prefix (s, &p, &i)))
				return (char *) s;
		} while (merge_partime (t, &p) != 0);
		s = s1;
	}
	return (char *) s;
}