261 lines
5.7 KiB
C
261 lines
5.7 KiB
C
|
|
/********************************************
|
|
split.c
|
|
copyright 1991, Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
********************************************/
|
|
|
|
/* $Log: split.c,v $
|
|
/* Revision 1.1.1.1 1993/03/21 09:45:37 cgd
|
|
/* initial import of 386bsd-0.1 sources
|
|
/*
|
|
* Revision 5.1 91/12/05 07:56:31 brennan
|
|
* 1.1 pre-release
|
|
*
|
|
*/
|
|
|
|
/* split.c */
|
|
|
|
#define TEMPBUFF_GOES_HERE
|
|
|
|
#include "mawk.h"
|
|
#include "symtype.h"
|
|
#include "bi_vars.h"
|
|
#include "bi_funct.h"
|
|
#include "memory.h"
|
|
#include "scan.h"
|
|
#include "regexp.h"
|
|
#include "field.h"
|
|
|
|
SPLIT_OV *split_ov_list ;
|
|
|
|
static int PROTO(re_ov_split, (char *, PTR) ) ;
|
|
static int PROTO(space_ov_split, (char *, char *) ) ;
|
|
|
|
/* split string s of length slen on SPACE without changing s.
|
|
load the pieces into STRINGS and ptrs into
|
|
split_buff[]
|
|
return the number of pieces */
|
|
|
|
int space_split( s , slen)
|
|
register char *s ;
|
|
unsigned slen ;
|
|
{ char *back = s + slen ;
|
|
int i = 0 ;
|
|
int len ;
|
|
char *q ;
|
|
STRING *sval ;
|
|
int lcnt = MAX_SPLIT/3 ;
|
|
|
|
#define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\
|
|
SC_SPACE ) s++
|
|
#define EAT_NON_SPACE() \
|
|
*back = ' ' ; /* sentinel */\
|
|
while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\
|
|
*back = 0
|
|
|
|
|
|
while ( lcnt-- )
|
|
{
|
|
EAT_SPACE() ;
|
|
if ( *s == 0 ) goto done ;
|
|
/* mark the front with q */
|
|
q = s++ ;
|
|
EAT_NON_SPACE() ;
|
|
sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
|
|
(void) memcpy(sval->str, q, SIZE_T(len)) ;
|
|
|
|
EAT_SPACE() ;
|
|
if ( *s == 0 ) goto done ;
|
|
q = s++ ;
|
|
EAT_NON_SPACE() ;
|
|
sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
|
|
(void) memcpy(sval->str, q, SIZE_T(len)) ;
|
|
|
|
EAT_SPACE() ;
|
|
if ( *s == 0 ) goto done ;
|
|
q = s++ ;
|
|
EAT_NON_SPACE() ;
|
|
sval = split_buff[i++] = new_STRING((char *) 0, len = s - q ) ;
|
|
(void) memcpy(sval->str, q, SIZE_T(len)) ;
|
|
|
|
}
|
|
/* we've overflowed */
|
|
return i + space_ov_split(s, back) ;
|
|
|
|
done:
|
|
return i ;
|
|
}
|
|
|
|
static int space_ov_split(s, back)
|
|
register char *s ;
|
|
char *back ;
|
|
|
|
{
|
|
SPLIT_OV dummy ;
|
|
register SPLIT_OV *tail = &dummy ;
|
|
char *q ;
|
|
int cnt = 0 ;
|
|
unsigned len ;
|
|
|
|
while ( 1 )
|
|
{
|
|
EAT_SPACE() ;
|
|
if ( *s == 0 ) break ; /* done */
|
|
q = s++ ;
|
|
EAT_NON_SPACE() ;
|
|
|
|
tail = tail->link = (SPLIT_OV*) zmalloc(sizeof(SPLIT_OV)) ;
|
|
tail->sval = new_STRING((char *) 0 , len = s-q) ;
|
|
(void) memcpy(tail->sval->str, q, SIZE_T(len)) ;
|
|
cnt++ ;
|
|
}
|
|
|
|
tail->link = (SPLIT_OV*) 0 ;
|
|
split_ov_list = dummy.link ;
|
|
return cnt ;
|
|
}
|
|
|
|
|
|
char *re_pos_match(s, re, lenp)
|
|
register char *s ;
|
|
PTR re ; unsigned *lenp ;
|
|
{
|
|
while ( s = REmatch(s, re, lenp) )
|
|
if ( *lenp ) return s ;
|
|
else
|
|
if ( *s == 0 ) break ;
|
|
else s++ ;
|
|
|
|
return (char *) 0 ;
|
|
}
|
|
|
|
int re_split(s, re)
|
|
char *s ;
|
|
PTR re ;
|
|
{ register char *t ;
|
|
int i = 0 ;
|
|
unsigned mlen, len ;
|
|
STRING *sval ;
|
|
int lcnt = MAX_SPLIT / 3 ;
|
|
|
|
while ( lcnt-- )
|
|
{
|
|
if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
|
|
sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
|
|
(void) memcpy(sval->str, s, SIZE_T(len)) ;
|
|
s = t + mlen ;
|
|
|
|
if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
|
|
sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
|
|
(void) memcpy(sval->str, s, SIZE_T(len)) ;
|
|
s = t + mlen ;
|
|
|
|
if ( !(t = re_pos_match(s, re, &mlen)) ) goto done ;
|
|
sval = split_buff[i++] = new_STRING( (char *)0, len = t-s) ;
|
|
(void) memcpy(sval->str, s, SIZE_T(len)) ;
|
|
s = t + mlen ;
|
|
}
|
|
/* we've overflowed */
|
|
return i + re_ov_split(s, re) ;
|
|
|
|
done:
|
|
split_buff[i++] = new_STRING(s) ;
|
|
return i ;
|
|
}
|
|
|
|
/*
|
|
we've overflowed split_buff[] , put
|
|
the rest on the split_ov_list
|
|
return number of pieces
|
|
*/
|
|
|
|
static int re_ov_split(s, re)
|
|
char *s ;
|
|
PTR re ;
|
|
{
|
|
SPLIT_OV dummy ;
|
|
register SPLIT_OV *tail = &dummy ;
|
|
int cnt = 1 ;
|
|
char *t ;
|
|
unsigned len, mlen ;
|
|
|
|
while ( t = re_pos_match(s, re, &mlen) )
|
|
{
|
|
tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ;
|
|
tail->sval = new_STRING( (char *)0, len = t-s) ;
|
|
(void) memcpy(tail->sval->str, s, SIZE_T(len)) ;
|
|
s = t + mlen ;
|
|
cnt++ ;
|
|
}
|
|
/* and one more */
|
|
tail = tail->link = (SPLIT_OV *) zmalloc(sizeof(SPLIT_OV)) ;
|
|
tail->sval = new_STRING(s) ;
|
|
tail->link = (SPLIT_OV*) 0 ;
|
|
split_ov_list = dummy.link ;
|
|
|
|
return cnt ;
|
|
}
|
|
|
|
/* split(s, X, r)
|
|
split s into array X on r
|
|
|
|
entry: sp[0] holds r
|
|
sp[-1] pts at X
|
|
sp[-2] holds s
|
|
*/
|
|
CELL *bi_split(sp)
|
|
register CELL *sp ;
|
|
{
|
|
int cnt ; /* the number of pieces */
|
|
|
|
|
|
if ( sp->type < C_RE ) cast_for_split(sp) ;
|
|
/* can be C_RE, C_SPACE or C_SNULL */
|
|
sp -= 2 ;
|
|
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
|
|
|
|
if ( string(sp)->len == 0 ) /* nothing to split */
|
|
{ free_STRING( string(sp) ) ;
|
|
sp->type = C_DOUBLE ; sp->dval = 0.0 ;
|
|
return sp ;
|
|
}
|
|
|
|
switch ( (sp+2)->type )
|
|
{
|
|
case C_RE :
|
|
cnt = re_split(string(sp)->str, (sp+2)->ptr) ;
|
|
break ;
|
|
|
|
case C_SPACE :
|
|
cnt = space_split(string(sp)->str, string(sp)->len) ;
|
|
break ;
|
|
|
|
/* this case could be done by C_RE, but very slowly.
|
|
Since it is the common way to eliminate fields,
|
|
we'll treat the special case for speed */
|
|
case C_SNULL : /* split on empty string */
|
|
cnt = 1 ;
|
|
split_buff[0] = (STRING *) sp->ptr ;
|
|
string(sp)->ref_cnt++ ;
|
|
break ;
|
|
|
|
default : bozo("bad splitting cell in bi_split") ;
|
|
}
|
|
|
|
|
|
free_STRING( string(sp) ) ;
|
|
sp->type = C_DOUBLE ;
|
|
sp->dval = (double) cnt ;
|
|
|
|
load_array((ARRAY)(sp+1)->ptr, cnt) ;
|
|
|
|
return sp ;
|
|
}
|
|
|