/******************************************** fin.c copyright 1991, 1992. Michael D. Brennan This is a source file for mawk, an implementation of the AWK programming language. Mawk is distributed without warranty under the terms of the GNU General Public License, version 2, 1991. ********************************************/ /*$Log: fin.c,v $ /*Revision 1.2 1993/07/02 23:57:23 jtc /*Updated to mawk 1.1.4 /* * Revision 5.6 1992/12/17 02:48:01 mike * 1.1.2d changes for DOS * * Revision 5.5 1992/07/28 15:11:30 brennan * minor change in finding eol, needed for MsDOS * * Revision 5.4 1992/07/10 16:17:10 brennan * MsDOS: remove NO_BINMODE macro * * Revision 5.3 1992/07/08 16:14:27 brennan * FILENAME and FNR retain last values in the * END block. * * Revision 5.2 1992/02/21 13:30:08 brennan * fixed bug that free'd FILENAME twice if * command line was var=value only * * Revision 5.1 91/12/05 07:56:02 brennan * 1.1 pre-release * */ /* fin.c */ #include "mawk.h" #include "fin.h" #include "memory.h" #include "bi_vars.h" #include "field.h" #include "symtype.h" #include "scan.h" #if HAVE_FCNTL_H #include #endif /* This file handles input files. Opening, closing, buffering and (most important) splitting files into records, FINgets(). */ #ifndef MSDOS_MSC extern int errno ; #endif int PROTO(isatty, (int) ) ; static FIN *PROTO( next_main, (int) ) ; static char *PROTO( enlarge_fin_buffer, (FIN *) ) ; static void PROTO(set_main_to_stdin, (void) ) ; int PROTO( is_cmdline_assign, (char *) ) ; /* also used by init */ FIN *FINdopen( fd, main_flag ) int fd , main_flag ; { register FIN *fin = (FIN *) zmalloc( sizeof(FIN) ) ; fin->fd = fd ; fin->flags = main_flag ? (MAIN_FLAG|START_FLAG) : START_FLAG ; fin->buffp = fin->buff = (char *) zmalloc(BUFFSZ+1) ; fin->nbuffs = 1 ; fin->buff[0] = 0 ; if ( isatty(fd) && rs_shadow.type == SEP_CHAR && rs_shadow.c == '\n' ) { /* interactive, i.e., line buffer this file */ if ( fd == 0 ) fin->fp = stdin ; else if ( !(fin->fp = fdopen(fd, "r")) ) { errmsg(errno, "fdopen failed") ; exit(1) ; } } else fin->fp = (FILE *) 0 ; return fin ; } FIN *FINopen( filename, main_flag ) char *filename ; int main_flag ; { int fd ; int oflag = O_RDONLY ; #if MSDOS int bm = binmode() & 1 ; if ( bm ) oflag |= O_BINARY ; #endif if ( filename[0] == '-' && filename[1] == 0 ) { #if MSDOS if ( bm ) setmode(0, O_BINARY) ; #endif return FINdopen(0, main_flag) ; } #ifdef THINK_C if ( (fd = open( filename , oflag )) == -1 ) #else if ( (fd = open( filename , oflag, 0 )) == -1 ) #endif { errmsg( errno, "cannot open %s" , filename ) ; return (FIN *) 0 ; } return FINdopen( fd, main_flag ) ; } /* frees the buffer and fd, but leaves FIN structure until the user calls close() */ void FINsemi_close(fin) register FIN *fin ; { static char dead = 0 ; if ( fin->buff != &dead ) { zfree(fin->buff, fin->nbuffs*BUFFSZ + 1) ; if ( fin->fd ) if ( fin->fp ) (void) fclose(fin->fp) ; else (void) close(fin->fd) ; fin->buff = fin->buffp = &dead ; /* marks it semi_closed */ } /* else was already semi_closed */ } /* user called close() on input file */ void FINclose( fin ) FIN *fin ; { FINsemi_close(fin) ; zfree( fin , sizeof(FIN) ) ; } /* return one input record as determined by RS, from input file (FIN) fin */ char *FINgets( fin, len_p ) FIN *fin ; unsigned *len_p ; { register char *p, *q ; unsigned match_len ; unsigned r ; restart : if ( ! (p = fin->buffp)[0] ) /* need a refill */ { if ( fin->flags & EOF_FLAG ) if ( fin->flags & MAIN_FLAG ) { fin = next_main(0) ; goto restart ; } else { *len_p = 0 ; return (char *) 0 ; } if ( fin->fp ) /* line buffering */ if ( ! fgets(fin->buff, BUFFSZ+1, fin->fp) ) { fin->flags |= EOF_FLAG ; fin->buff[0] = 0 ; fin->buffp = fin->buff ; goto restart ; /* might be main_fin */ } else /* return this line */ { /* find eol */ p = fin->buff ; while ( *p != '\n' && *p != 0 ) p++ ; *p = 0 ; *len_p = p - fin->buff ; fin->buffp = p ; #ifdef THINK_C /* * I still don't understand why this is needed, unless fgets() * also does this conversion internally for no good reason. :-( */ for ( p = fin->buff ; *p ; ++p ) { if (*p == '\n') *p = '\r'; else if (*p == '\r') *p = '\n'; } #endif return fin->buff ; } else /* block buffering */ { if ( (r = fillbuff(fin->fd, fin->buff, fin->nbuffs*BUFFSZ)) == 0 ) { fin->flags |= EOF_FLAG ; fin->buffp = fin->buff ; goto restart ; /* might be main */ } else if ( r < fin->nbuffs*BUFFSZ ) fin->flags |= EOF_FLAG ; p = fin->buffp = fin->buff ; if ( fin->flags & START_FLAG ) { fin->flags &= ~START_FLAG ; if ( rs_shadow.type == SEP_MLR ) { /* trim blank lines from front of file */ while ( *p == '\n' ) p++ ; fin->buffp = p ; if ( *p == 0 ) goto restart ; } } } } retry: switch( rs_shadow.type ) { case SEP_CHAR : q = strchr(p, rs_shadow.c) ; match_len = 1 ; break ; case SEP_STR : q = str_str(p, ((STRING *) rs_shadow.ptr)->str, match_len = ((STRING *) rs_shadow.ptr)->len ) ; break ; case SEP_MLR : case SEP_RE : q = re_pos_match(p, rs_shadow.ptr, &match_len) ; /* if the match is at the end, there might still be more to match in the file */ if ( q && q[match_len] == 0 && ! (fin->flags & EOF_FLAG)) q = (char *) 0 ; break ; default : bozo("type of rs_shadow") ; } if ( q ) { /* the easy and normal case */ *q = 0 ; *len_p = q - p ; fin->buffp = q + match_len ; return p ; } if ( fin->flags & EOF_FLAG ) { /* last line without a record terminator */ *len_p = r = strlen(p) ; fin->buffp = p+r ; if ( rs_shadow.type == SEP_MLR && fin->buffp[-1] == '\n' && r != 0 ) { (*len_p)-- ; * -- fin->buffp = 0 ; } return p ; } if ( p == fin->buff ) { /* current record is too big for the input buffer, grow buffer */ p = enlarge_fin_buffer(fin) ; } else { /* move a partial line to front of buffer and try again */ unsigned rr ; p = (char *) memcpy( fin->buff, p, SIZE_T(r = strlen(p)) ) ; q = p+r ; rr = fin->nbuffs*BUFFSZ - r ; if ( (r = fillbuff(fin->fd, q, rr)) < rr ) fin->flags |= EOF_FLAG ; } goto retry ; } static char *enlarge_fin_buffer(fin) FIN *fin ; { unsigned r ; unsigned oldsize = fin->nbuffs*BUFFSZ+1 ; #if LM_DOS /* I'm not sure this can really happen: avoid "16bit wrap" */ if ( fin->nbuffs == MAX_BUFFS ) { errmsg(0, "out of input buffer space") ; mawk_exit(1) ; } #endif fin->buffp = fin->buff = (char *) zrealloc(fin->buff, oldsize, oldsize+BUFFSZ); fin->nbuffs++ ; r = fillbuff(fin->fd, fin->buff + (oldsize-1) , BUFFSZ ) ; if ( r < BUFFSZ ) fin->flags |= EOF_FLAG ; return fin->buff ; } /*-------- target is big enough to hold size + 1 chars on exit the back of the target is zero terminated *--------------*/ unsigned fillbuff(fd, target, size) int fd ; register char *target ; unsigned size ; { register int r ; unsigned entry_size = size ; #ifdef THINK_C register char *p = target; #endif while ( size ) switch( r = read(fd, target, size) ) { case -1 : errmsg(errno, "read error") ; exit(1) ; case 0 : goto out ; default : target += r ; size -= r ; break ; } out : *target = 0 ; #ifdef THINK_C /* * I still don't understand why this is needed, unless read() also does * this conversion internally for no good reason. :-( */ for ( ; *p ; ++p ) { if (*p == '\r') *p = '\n'; else if (*p == '\n') *p = '\r'; } #endif return entry_size - size ; } /* main_fin is a handle to the main input stream == 0 never been opened */ FIN *main_fin ; ARRAY Argv ; /* to the user this is ARGV */ static double argi = 1.0 ; /* index of next ARGV[argi] to try to open */ static void set_main_to_stdin() { cell_destroy( FILENAME ) ; FILENAME->type = C_STRING ; FILENAME->ptr = (PTR) new_STRING( "-") ; cell_destroy(FNR) ; FNR->type = C_DOUBLE ; FNR->dval = 0.0 ; main_fin = FINdopen(0, 1) ; } /* this gets called once to get the input stream going. It is called after the execution of the BEGIN block unless there is a getline inside BEGIN {} */ void open_main() { CELL argc ; #if MSDOS int k = binmode() ; if ( k & 1 ) setmode(0, O_BINARY) ; if ( k & 2 ) { setmode(1,O_BINARY) ; setmode(2,O_BINARY) ; } #endif (void) cellcpy(&argc, ARGC) ; if ( argc.type != C_DOUBLE ) cast1_to_d(&argc) ; if ( argc.dval == 1.0 ) set_main_to_stdin() ; else (void) next_main(1) ; } /* get the next command line file open */ static FIN *next_main(open_flag) int open_flag ; /* called by open_main() if on */ { register CELL *cp ; CELL argc ; /* copy of ARGC */ CELL c_argi ; /* cell copy of argi */ CELL argval ; /* copy of ARGV[c_argi] */ argval.type = C_NOINIT ; c_argi.type = C_DOUBLE ; if ( main_fin ) FINclose(main_fin) ; /* FILENAME and FNR don't change unless we really open a new file */ /* make a copy of ARGC to avoid side effect */ if ( cellcpy(&argc, ARGC)->type != C_DOUBLE ) cast1_to_d(&argc) ; while ( argi < argc.dval ) { c_argi.dval = argi ; argi += 1.0 ; if ( !(cp = array_find( Argv, &c_argi, NO_CREATE)) ) continue ; /* its deleted */ /* make a copy so we can cast w/o side effect */ cell_destroy(&argval) ; cp = cellcpy(&argval, cp) ; if ( cp->type < C_STRING ) cast1_to_s(cp) ; if ( string(cp)->len == 0 ) continue ; /* file argument is "" */ /* it might be a command line assignment */ if ( is_cmdline_assign(string(cp)->str) ) continue ; /* try to open it -- we used to continue on failure, but posix says we should quit */ if ( ! (main_fin = FINopen( string(cp)->str, 1 )) ) mawk_exit(1) ; /* success -- set FILENAME and FNR */ cell_destroy(FILENAME) ; (void) cellcpy(FILENAME , cp ) ; free_STRING( string(cp) ) ; cell_destroy(FNR) ; FNR->type = C_DOUBLE ; FNR->dval = 0.0 ; return main_fin ; } /* failure */ cell_destroy(&argval) ; if ( open_flag ) /* all arguments were null or assignment */ { set_main_to_stdin() ; return main_fin ; } /* real failure */ { /* this is how we mark EOF on main_fin */ static char dead_buff = 0 ; static FIN dead_main = {0, (FILE*)0, &dead_buff, &dead_buff, 1, EOF_FLAG} ; return main_fin = &dead_main ; /* since MAIN_FLAG is not set, FINgets won't call next_main() */ } } int is_cmdline_assign(s) char *s ; { register char *p ; int c ; SYMTAB *stp ; CELL *cp ; unsigned len ; CELL cell ; /* used if command line assign to pseudo field */ CELL *fp = (CELL *) 0 ; /* ditto */ if ( scan_code[*(unsigned char *)s] != SC_IDCHAR ) return 0 ; p = s+1 ; while ( (c = scan_code[*(unsigned char*)p]) == SC_IDCHAR || c == SC_DIGIT ) p++ ; if ( *p != '=' ) return 0 ; *p = 0 ; stp = find(s) ; switch( stp->type ) { case ST_NONE : stp->type = ST_VAR ; stp->stval.cp = cp = new_CELL() ; break ; case ST_VAR : case ST_NR : /* !! no one will do this */ cp = stp->stval.cp ; cell_destroy(cp) ; break ; case ST_FIELD : /* must be pseudo field */ fp = stp->stval.cp ; cp = &cell ; break ; default : rt_error( "cannot command line assign to %s\n\ttype clash or keyword" , s ) ; } /* we need to keep ARGV[i] intact */ *p++ = '=' ; len = strlen(p)+1 ; /* posix says escape sequences are on from command line */ p = rm_escape( strcpy((char*)zmalloc(len), p) ) ; cp->ptr = (PTR) new_STRING(p) ; zfree(p,len) ; check_strnum(cp) ; /* sets cp->type */ if ( fp ) /* move it from cell to pfield[] */ { field_assign(fp, cp) ; free_STRING(string(cp)) ; } return 1 ; }