NetBSD/gnu/usr.bin/diff/io.c

/* File I/O for GNU DIFF.
   Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.

This file is part of GNU DIFF.

GNU DIFF is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU DIFF is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU DIFF; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

#ifndef lint
static char *rcsid = "$Id: io.c,v 1.4 1993/09/16 17:39:17 jtc Exp $";
#endif

#include "diff.h"

/* Rotate a value n bits to the left. */
#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))

/* Given a hash value and a new character, return a new hash value. */
#define HASH(h, c) ((c) + ROL (h, 7))

/* Guess remaining number of lines from number N of lines so far,
   size S so far, and total size T.  */
#define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)

/* Type used for fast prefix comparison in find_identical_ends.  */
typedef long word;

/* Lines are put into equivalence classes (of lines that match in line_cmp).
   Each equivalence class is represented by one of these structures,
   but only while the classes are being computed.
   Afterward, each class is represented by a number.  */
struct equivclass
{
  int next;	/* Next item in this bucket. */
  unsigned hash;	/* Hash of lines in this class.  */
  char const *line;	/* A line that fits this class. */
  size_t length;	/* The length of that line.  */
};

/* Hash-table: array of buckets, each being a chain of equivalence classes.  */
static int *buckets;
  
/* Number of buckets in the hash table array. */
static int nbuckets;

/* Array in which the equivalence classes are allocated.
   The bucket-chains go through the elements in this array.
   The number of an equivalence class is its index in this array.  */
static struct equivclass *equivs;

/* Index of first free element in the array `equivs'.  */
static int equivs_index;

/* Number of elements allocated in the array `equivs'.  */
static int equivs_alloc;

static void find_and_hash_each_line PARAMS((struct file_data *));
static void find_identical_ends PARAMS((struct file_data[]));
static void prepare_text_end PARAMS((struct file_data *));

/* Check for binary files and compare them for exact identity.  */

/* Return 1 if BUF contains a non text character.
   SIZE is the number of characters in BUF.  */

#define binary_file_p(buf, size) (size != 0 && memchr (buf, '\0', size) != 0)

/* Get ready to read the current file.
   Return nonzero if SKIP_TEST is zero,
   and if it appears to be a binary file.  */

int
sip (current, skip_test)
     struct file_data *current;
     int skip_test;
{
  /* If we have a nonexistent file at this stage, treat it as empty.  */
  if (current->desc < 0)
    {
      /* Leave room for a sentinel.  */
      current->buffer = xmalloc (sizeof (word));
      current->bufsize = sizeof (word);
      current->buffered_chars = 0;
    }
  else
    {
      current->bufsize = current->buffered_chars
	= STAT_BLOCKSIZE (current->stat);

      if (S_ISREG (current->stat.st_mode))
	/* Get the size out of the stat block.
	   Allocate enough room for appended newline and sentinel.
	   Allocate at least one block, to prevent overrunning the buffer
	   when comparing growing binary files.  */
	current->bufsize = max (current->bufsize,
				current->stat.st_size + sizeof (word) + 1);

      current->buffer = xmalloc (current->bufsize);
      if (skip_test)
	current->buffered_chars = 0;
      else
	{
	  /* Check first part of file to see if it's a binary file.  */
	  current->buffered_chars = read (current->desc,
					  current->buffer,
					  current->buffered_chars);
	  if (current->buffered_chars == -1)
	    pfatal_with_name (current->name);
	  return binary_file_p (current->buffer, current->buffered_chars);
	}
    }
  
  return 0;
}

/* Slurp the rest of the current file completely into memory.  */

void
slurp (current)
     struct file_data *current;
{
  size_t cc;

  if (current->desc < 0)
    /* The file is nonexistent.  */
    ;
  else if (S_ISREG (current->stat.st_mode))
    {
      /* It's a regular file; slurp in the rest all at once.  */
      cc = current->stat.st_size - current->buffered_chars;
      if (cc)
	{
	  cc = read (current->desc,
		     current->buffer + current->buffered_chars,
		     cc);
	  if (cc == -1)
	    pfatal_with_name (current->name);
	  current->buffered_chars += cc;
	}
    }
  /* It's not a regular file; read it, growing the buffer as needed.  */
  else if (always_text_flag || current->buffered_chars != 0)
    {
      for (;;)
	{
	  if (current->buffered_chars == current->bufsize)
	    {
	      current->bufsize = current->bufsize * 2;
	      current->buffer = xrealloc (current->buffer, current->bufsize);
	    }
	  cc = read (current->desc,
		     current->buffer + current->buffered_chars,
		     current->bufsize - current->buffered_chars);
	  if (cc == 0)
	    break;
	  if (cc == -1)
	    pfatal_with_name (current->name);
	  current->buffered_chars += cc;
	}
      /* Allocate just enough room for appended newline and sentinel.  */
      current->bufsize = current->buffered_chars + sizeof (word) + 1;
      current->buffer = xrealloc (current->buffer, current->bufsize);
    }
}

/* Split the file into lines, simultaneously computing the equivalence class for
   each line. */

static void
find_and_hash_each_line (current)
     struct file_data *current;
{
  unsigned h;
  unsigned char const *p = (unsigned char const *) current->prefix_end;
  unsigned char c;
  int i, *bucket;
  size_t length;

  /* Cache often-used quantities in local variables to help the compiler.  */
  char const **linbuf = current->linbuf;
  int alloc_lines = current->alloc_lines;
  int line = 0;
  int linbuf_base = current->linbuf_base;
  int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
  struct equivclass *eqs = equivs;
  int eqs_index = equivs_index;
  int eqs_alloc = equivs_alloc;
  char const *suffix_begin = current->suffix_begin;
  char const *bufend = current->buffer + current->buffered_chars;
  char const *incomplete_tail
    = current->missing_newline && ROBUST_OUTPUT_STYLE (output_style)
      ? bufend : (char const *) 0;
  int varies = length_varies;

  while ((char const *) p < suffix_begin)
    {
      char const *ip = (char const *) p;

      /* Compute the equivalence class for this line.  */

      h = 0;

      /* Hash this line until we find a newline. */
      if (ignore_case_flag)
	{
	  if (ignore_all_space_flag)
	    while ((c = *p++) != '\n')
	      {
		if (! isspace (c))
		  h = HASH (h, isupper (c) ? tolower (c) : c);
	      }
	  else if (ignore_space_change_flag)
	    while ((c = *p++) != '\n')
	      {
		if (isspace (c))
		  {
		    while (isspace (c = *p++))
		      if (c == '\n')
			goto hashing_done;
		    h = HASH (h, ' ');
		  }
		/* C is now the first non-space.  */
		h = HASH (h, isupper (c) ? tolower (c) : c);
	      }
	  else
	    while ((c = *p++) != '\n')
	      h = HASH (h, isupper (c) ? tolower (c) : c);
	}
      else
	{
	  if (ignore_all_space_flag)
	    while ((c = *p++) != '\n')
	      {
		if (! isspace (c))
		  h = HASH (h, c);
	      }
	  else if (ignore_space_change_flag)
	    while ((c = *p++) != '\n')
	      {
		if (isspace (c))
		  {
		    while (isspace (c = *p++))
		      if (c == '\n')
			goto hashing_done;
		    h = HASH (h, ' ');
		  }
		/* C is now the first non-space.  */
		h = HASH (h, c);
	      }
	  else
	    while ((c = *p++) != '\n')
	      h = HASH (h, c);
	}
   hashing_done:;

      bucket = &buckets[h % nbuckets];
      length = (char const *) p - ip - ((char const *) p == incomplete_tail);
      for (i = *bucket;  ;  i = eqs[i].next)
	if (!i)
	  {
	    /* Create a new equivalence class in this bucket. */
	    i = eqs_index++;
	    if (i == eqs_alloc)
	      eqs = (struct equivclass *)
		      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
	    eqs[i].next = *bucket;
	    eqs[i].hash = h;
	    eqs[i].line = ip;
	    eqs[i].length = length;
	    *bucket = i;
	    break;
	  }
	else if (eqs[i].hash == h
		 && (eqs[i].length == length || varies)
		 && ! line_cmp (eqs[i].line, eqs[i].length, ip, length))
	  /* Reuse existing equivalence class.  */
	    break;

      /* Maybe increase the size of the line table. */
      if (line == alloc_lines)
	{
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
	  alloc_lines = 2 * alloc_lines - linbuf_base;
	  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
					     (alloc_lines - linbuf_base)
					     * sizeof (*linbuf))
		   - linbuf_base;
	}
      linbuf[line] = ip;
      cureqs[line] = i;
      ++line;
    }

  current->buffered_lines = line;

  for (i = 0;  ;  i++)
    {
      /* Record the line start for lines in the suffix that we care about.
         Record one more line start than lines,
	 so that we can compute the length of any buffered line.  */
      if (line == alloc_lines)
	{
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
	  alloc_lines = 2 * alloc_lines - linbuf_base;
	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
					     (alloc_lines - linbuf_base)
					     * sizeof (*linbuf))
		   - linbuf_base;
	}
      linbuf[line] = (char const *) p;
    
      if ((char const *) p == bufend)
	{
	  linbuf[line]  -=  (char const *) p == incomplete_tail;
	  break;
	}

      if (context <= i && no_diff_means_no_output)
	break;

      line++;

      while (*p++ != '\n')
	;
    }

  /* Done with cache in local variables.  */
  current->linbuf = linbuf;
  current->valid_lines = line;
  current->alloc_lines = alloc_lines;
  current->equivs = cureqs;
  equivs = eqs;
  equivs_alloc = eqs_alloc;
  equivs_index = eqs_index;
}

/* Prepare the end of the text.  Make sure it's initialized.
   Make sure text ends in a newline,
   but remember that we had to add one unless -B is in effect.  */

static void
prepare_text_end (current)
     struct file_data *current;
{
  size_t buffered_chars = current->buffered_chars;
  char *p = current->buffer;

  if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
    current->missing_newline = 0;
  else
    {
      p[buffered_chars++] = '\n';
      current->buffered_chars = buffered_chars;
      current->missing_newline = ! ignore_blank_lines_flag;
    }
  
  /* Don't use uninitialized storage when planting or using sentinels.  */
  if (p)
    bzero (p + buffered_chars, sizeof (word));
}

/* Given a vector of two file_data objects, find the identical
   prefixes and suffixes of each object. */

static void
find_identical_ends (filevec)
     struct file_data filevec[];
{
  word *w0, *w1;
  char *p0, *p1, *buffer0, *buffer1;
  char const *end0, *beg0;
  char const **linbuf0, **linbuf1;
  int i, lines;
  size_t n0, n1, tem;
  int alloc_lines0, alloc_lines1;
  int buffered_prefix, prefix_count, prefix_mask;

  slurp (&filevec[0]);
  if (filevec[0].desc != filevec[1].desc)
    slurp (&filevec[1]);
  else
    {
      filevec[1].buffer = filevec[0].buffer;
      filevec[1].bufsize = filevec[0].bufsize;
      filevec[1].buffered_chars = filevec[0].buffered_chars;
    }
  for (i = 0; i < 2; i++)
    prepare_text_end (&filevec[i]);

  /* Find identical prefix.  */

  p0 = buffer0 = filevec[0].buffer;
  p1 = buffer1 = filevec[1].buffer;

  n0 = filevec[0].buffered_chars;
  n1 = filevec[1].buffered_chars;

  if (p0 == p1)
    /* The buffers are the same; sentinels won't work.  */
    p0 = p1 += n1;
  else
    {
      /* Insert end sentinels, in this case characters that are guaranteed
	 to make the equality test false, and thus terminate the loop.  */

      if (n0 < n1)
	p0[n0] = ~p1[n0];
      else
	p1[n1] = ~p0[n1];

      /* Loop until first mismatch, or to the sentinel characters.  */

      /* Compare a word at a time for speed.  */
      w0 = (word *) p0;
      w1 = (word *) p1;
      while (*w0++ == *w1++)
	;
      --w0, --w1;

      /* Do the last few bytes of comparison a byte at a time.  */
      p0 = (char *) w0;
      p1 = (char *) w1;
      while (*p0++ == *p1++)
	;
      --p0, --p1;

      /* Don't mistakenly count missing newline as part of prefix. */
      if (ROBUST_OUTPUT_STYLE (output_style)
	  && (buffer0 + n0 - filevec[0].missing_newline < p0)
	     !=
	     (buffer1 + n1 - filevec[1].missing_newline < p1))
	--p0, --p1;
    }

  /* Now P0 and P1 point at the first nonmatching characters.  */

  /* Skip back to last line-beginning in the prefix,
     and then discard up to HORIZON_LINES lines from the prefix.  */
  i = horizon_lines;
  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
    --p0, --p1;

  /* Record the prefix.  */
  filevec[0].prefix_end = p0;
  filevec[1].prefix_end = p1;

  /* Find identical suffix.  */

  /* P0 and P1 point beyond the last chars not yet compared.  */
  p0 = buffer0 + n0;
  p1 = buffer1 + n1;

  if (! ROBUST_OUTPUT_STYLE (output_style)
      || filevec[0].missing_newline == filevec[1].missing_newline)
    {
      end0 = p0;	/* Addr of last char in file 0.  */

      /* Get value of P0 at which we should stop scanning backward:
	 this is when either P0 or P1 points just past the last char
	 of the identical prefix.  */
      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);

      /* Scan back until chars don't match or we reach that point.  */
      while (p0 != beg0)
	if (*--p0 != *--p1)
	  {
	    /* Point at the first char of the matching suffix.  */
	    ++p0, ++p1;
	    beg0 = p0;
	    break;
	  }

      /* Are we at a line-beginning in both files?  If not, add the rest of
	 this line to the main body.  Discard up to HORIZON_LINES lines from
	 the identical suffix.  Also, discard one extra line,
	 because shift_boundaries may need it.  */
      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
			    &&
			    (buffer1 == p1 || p1[-1] == '\n'));
      while (i-- && p0 != end0)
	while (*p0++ != '\n')
	  ;

      p1 += p0 - beg0;
    }

  /* Record the suffix.  */
  filevec[0].suffix_begin = p0;
  filevec[1].suffix_begin = p1;

  /* Calculate number of lines of prefix to save.

     prefix_count == 0 means save the whole prefix;
     we need this with for options like -D that output the whole file.
     We also need it for options like -F that output some preceding line;
     at least we will need to find the last few lines,
     but since we don't know how many, it's easiest to find them all.

     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
     of the line buffer; they'll be moved to the proper location later.
     Handle 1 more line than the context says (because we count 1 too many),
     rounded up to the next power of 2 to speed index computation.  */

  if (no_diff_means_no_output && ! function_regexp_list)
    {
      for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
	;
      prefix_mask = prefix_count - 1;
      alloc_lines0
	= prefix_count
	  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
	  + context;
    }
  else
    {
      prefix_count = 0;
      prefix_mask = ~0;
      alloc_lines0 = GUESS_LINES (0, 0, n0);
    }

  lines = 0;
  linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));

  /* If the prefix is needed, find the prefix lines.  */
  if (! (no_diff_means_no_output
	 && filevec[0].prefix_end == p0
	 && filevec[1].prefix_end == p1))
    {
      p0 = buffer0;
      end0 = filevec[0].prefix_end;
      while (p0 != end0)
	{
	  int l = lines++ & prefix_mask;
	  if (l == alloc_lines0)
	    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
							 * sizeof(*linbuf0));
	  linbuf0[l] = p0;
	  while (*p0++ != '\n')
	    ;
	}
    }
  buffered_prefix = prefix_count && context < lines ? context : lines;

  /* Allocate line buffer 1.  */
  tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;

  alloc_lines1
    = (buffered_prefix
       + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
       + context);
  linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));

  if (buffered_prefix != lines)
    {
      /* Rotate prefix lines to proper location.  */
      for (i = 0;  i < buffered_prefix;  i++)
	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
      for (i = 0;  i < buffered_prefix;  i++)
	linbuf0[i] = linbuf1[i];
    }

  /* Initialize line buffer 1 from line buffer 0.  */
  for (i = 0; i < buffered_prefix; i++)
    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;

  /* Record the line buffer, adjusted so that
     linbuf*[0] points at the first differing line.  */
  filevec[0].linbuf = linbuf0 + buffered_prefix;
  filevec[1].linbuf = linbuf1 + buffered_prefix;
  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
}

/* Largest primes less than some power of two, for nbuckets.  Values range
   from useful to preposterous.  If one of these numbers isn't prime
   after all, don't blame it on me, blame it on primes (6) . . . */
static int const primes[] =
{
  509,
  1021,
  2039,
  4093,
  8191,
  16381,
  32749,
  65521,
  131071,
  262139,
  524287,
  1048573,
  2097143,
  4194301,
  8388593,
  16777213,
  33554393,
  67108859,			/* Preposterously large . . . */
  134217689,
  268435399,
  536870909,
  1073741789,
  2147483647,
  0
};

/* Given a vector of two file_data objects, read the file associated
   with each one, and build the table of equivalence classes.
   Return 1 if either file appears to be a binary file.
   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */

int
read_files (filevec, pretend_binary)
     struct file_data filevec[];
     int pretend_binary;
{
  int i;
  int skip_test = always_text_flag | pretend_binary;
  int appears_binary = pretend_binary | sip (&filevec[0], skip_test);

  if (filevec[0].desc != filevec[1].desc)
    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
  else
    {
      filevec[1].buffer = filevec[0].buffer;
      filevec[1].bufsize = filevec[0].bufsize;
      filevec[1].buffered_chars = filevec[0].buffered_chars;
    }
  if (appears_binary)
    return 1;

  find_identical_ends (filevec);

  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
  equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
  /* Equivalence class 0 is permanently safe for lines that were not
     hashed.  Real equivalence classes start at 1. */
  equivs_index = 1;

  for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
    if (! primes[i])
      abort ();
  nbuckets = primes[i];

  buckets = (int *) xmalloc (nbuckets * sizeof (*buckets));
  bzero (buckets, nbuckets * sizeof (*buckets));

  for (i = 0; i < 2; ++i)
    find_and_hash_each_line (&filevec[i]);

  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;

  free (equivs);
  free (buckets);

  return 0;
}
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								/* File I/O for GNU DIFF.
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								   Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								This file is part of GNU DIFF.
 								GNU DIFF is free software; you can redistribute it and/or modify
 								it under the terms of the GNU General Public License as published by
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								the Free Software Foundation; either version 2, or (at your option)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								any later version.
 								GNU DIFF is distributed in the hope that it will be useful,
 								but WITHOUT ANY WARRANTY; without even the implied warranty of
 								MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								GNU General Public License for more details.
 								You should have received a copy of the GNU General Public License
 								along with GNU DIFF; see the file COPYING.  If not, write to
 								the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
-												Add RCS identifiers, remove some completely useless RCS logs and patchkit
headers, and a few other insignificant changes.
											
										
										
											1993-08-02 21:18:41 +04:00
+								#ifndef lint
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								static char *rcsid = "$Id: io.c,v 1.4 1993/09/16 17:39:17 jtc Exp $";
 								#endif
-												Add RCS identifiers, remove some completely useless RCS logs and patchkit
headers, and a few other insignificant changes.
											
										
										
											1993-08-02 21:18:41 +04:00
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								#include "diff.h"
 								/* Rotate a value n bits to the left. */
 								#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								/* Given a hash value and a new character, return a new hash value. */
 								#define HASH(h, c) ((c) + ROL (h, 7))
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								/* Guess remaining number of lines from number N of lines so far,
 								   size S so far, and total size T.  */
 								#define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
 								/* Type used for fast prefix comparison in find_identical_ends.  */
 								typedef long word;
 								/* Lines are put into equivalence classes (of lines that match in line_cmp).
 								   Each equivalence class is represented by one of these structures,
 								   but only while the classes are being computed.
 								   Afterward, each class is represented by a number.  */
 								struct equivclass
 								{
 								  int next;	/* Next item in this bucket. */
 								  unsigned hash;	/* Hash of lines in this class.  */
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  char const *line;	/* A line that fits this class. */
 								  size_t length;	/* The length of that line.  */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								};
 								/* Hash-table: array of buckets, each being a chain of equivalence classes.  */
 								static int *buckets;
 								/* Number of buckets in the hash table array. */
 								static int nbuckets;
 								/* Array in which the equivalence classes are allocated.
 								   The bucket-chains go through the elements in this array.
 								   The number of an equivalence class is its index in this array.  */
 								static struct equivclass *equivs;
 								/* Index of first free element in the array `equivs'.  */
 								static int equivs_index;
 								/* Number of elements allocated in the array `equivs'.  */
 								static int equivs_alloc;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
 								static void find_and_hash_each_line PARAMS((struct file_data *));
 								static void find_identical_ends PARAMS((struct file_data[]));
 								static void prepare_text_end PARAMS((struct file_data *));
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								/* Check for binary files and compare them for exact identity.  */
 								/* Return 1 if BUF contains a non text character.
 								   SIZE is the number of characters in BUF.  */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								#define binary_file_p(buf, size) (size != 0 && memchr (buf, '\0', size) != 0)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								/* Get ready to read the current file.
 								   Return nonzero if SKIP_TEST is zero,
 								   and if it appears to be a binary file.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								int
 								sip (current, skip_test)
 								     struct file_data *current;
 								     int skip_test;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								{
 								  /* If we have a nonexistent file at this stage, treat it as empty.  */
 								  if (current->desc < 0)
 								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      /* Leave room for a sentinel.  */
 								      current->buffer = xmalloc (sizeof (word));
 								      current->bufsize = sizeof (word);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								      current->buffered_chars = 0;
 								    }
 								  else
 								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      current->bufsize = current->buffered_chars
 									= STAT_BLOCKSIZE (current->stat);
 								      if (S_ISREG (current->stat.st_mode))
 									/* Get the size out of the stat block.
 									   Allocate enough room for appended newline and sentinel.
 									   Allocate at least one block, to prevent overrunning the buffer
 									   when comparing growing binary files.  */
 									current->bufsize = max (current->bufsize,
 												current->stat.st_size + sizeof (word) + 1);
 								      current->buffer = xmalloc (current->bufsize);
 								      if (skip_test)
 									current->buffered_chars = 0;
 								      else
 									{
 									  /* Check first part of file to see if it's a binary file.  */
 									  current->buffered_chars = read (current->desc,
 													  current->buffer,
 													  current->buffered_chars);
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  if (current->buffered_chars == -1)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									    pfatal_with_name (current->name);
 									  return binary_file_p (current->buffer, current->buffered_chars);
 									}
 								    }
 								  return 0;
 								}
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								/* Slurp the rest of the current file completely into memory.  */
 								void
 								slurp (current)
 								     struct file_data *current;
 								{
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  size_t cc;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								  if (current->desc < 0)
 								    /* The file is nonexistent.  */
 								    ;
 								  else if (S_ISREG (current->stat.st_mode))
 								    {
 								      /* It's a regular file; slurp in the rest all at once.  */
 								      cc = current->stat.st_size - current->buffered_chars;
 								      if (cc)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									{
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									  cc = read (current->desc,
 										     current->buffer + current->buffered_chars,
 										     cc);
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  if (cc == -1)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									    pfatal_with_name (current->name);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									  current->buffered_chars += cc;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									}
 								    }
 								  /* It's not a regular file; read it, growing the buffer as needed.  */
 								  else if (always_text_flag || current->buffered_chars != 0)
 								    {
 								      for (;;)
 									{
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									  if (current->buffered_chars == current->bufsize)
 									    {
 									      current->bufsize = current->bufsize * 2;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									      current->buffer = xrealloc (current->buffer, current->bufsize);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									    }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									  cc = read (current->desc,
 										     current->buffer + current->buffered_chars,
 										     current->bufsize - current->buffered_chars);
 									  if (cc == 0)
 									    break;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  if (cc == -1)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									    pfatal_with_name (current->name);
 									  current->buffered_chars += cc;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									}
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      /* Allocate just enough room for appended newline and sentinel.  */
 								      current->bufsize = current->buffered_chars + sizeof (word) + 1;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      current->buffer = xrealloc (current->buffer, current->bufsize);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
 								}
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								/* Split the file into lines, simultaneously computing the equivalence class for
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								   each line. */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								static void
 								find_and_hash_each_line (current)
 								     struct file_data *current;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								{
 								  unsigned h;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  unsigned char const *p = (unsigned char const *) current->prefix_end;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  unsigned char c;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  int i, *bucket;
 								  size_t length;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								  /* Cache often-used quantities in local variables to help the compiler.  */
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  char const **linbuf = current->linbuf;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  int alloc_lines = current->alloc_lines;
 								  int line = 0;
 								  int linbuf_base = current->linbuf_base;
 								  int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
 								  struct equivclass *eqs = equivs;
 								  int eqs_index = equivs_index;
 								  int eqs_alloc = equivs_alloc;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  char const *suffix_begin = current->suffix_begin;
 								  char const *bufend = current->buffer + current->buffered_chars;
 								  char const *incomplete_tail
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								    = current->missing_newline && ROBUST_OUTPUT_STYLE (output_style)
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      ? bufend : (char const *) 0;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  int varies = length_varies;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  while ((char const *) p < suffix_begin)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      char const *ip = (char const *) p;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								      /* Compute the equivalence class for this line.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								      h = 0;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      /* Hash this line until we find a newline. */
 								      if (ignore_case_flag)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									{
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									  if (ignore_all_space_flag)
 									    while ((c = *p++) != '\n')
 									      {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										if (! isspace (c))
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										  h = HASH (h, isupper (c) ? tolower (c) : c);
 									      }
 									  else if (ignore_space_change_flag)
 									    while ((c = *p++) != '\n')
 									      {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										if (isspace (c))
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+										  {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										    while (isspace (c = *p++))
 										      if (c == '\n')
 											goto hashing_done;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										    h = HASH (h, ' ');
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+										  }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										/* C is now the first non-space.  */
 										h = HASH (h, isupper (c) ? tolower (c) : c);
 									      }
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									  else
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									    while ((c = *p++) != '\n')
 									      h = HASH (h, isupper (c) ? tolower (c) : c);
 									}
 								      else
 									{
 									  if (ignore_all_space_flag)
 									    while ((c = *p++) != '\n')
 									      {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										if (! isspace (c))
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										  h = HASH (h, c);
 									      }
 									  else if (ignore_space_change_flag)
 									    while ((c = *p++) != '\n')
 									      {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										if (isspace (c))
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+										  {
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+										    while (isspace (c = *p++))
 										      if (c == '\n')
 											goto hashing_done;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										    h = HASH (h, ' ');
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+										  }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+										/* C is now the first non-space.  */
 										h = HASH (h, c);
 									      }
 									  else
 									    while ((c = *p++) != '\n')
 									      h = HASH (h, c);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									}
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								   hashing_done:;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								      bucket = &buckets[h % nbuckets];
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      length = (char const *) p - ip - ((char const *) p == incomplete_tail);
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      for (i = *bucket;  ;  i = eqs[i].next)
 									if (!i)
 									  {
 									    /* Create a new equivalence class in this bucket. */
 									    i = eqs_index++;
 									    if (i == eqs_alloc)
 									      eqs = (struct equivclass *)
 										      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
 									    eqs[i].next = *bucket;
 									    eqs[i].hash = h;
 									    eqs[i].line = ip;
 									    eqs[i].length = length;
 									    *bucket = i;
 									    break;
 									  }
 									else if (eqs[i].hash == h
 										 && (eqs[i].length == length || varies)
 										 && ! line_cmp (eqs[i].line, eqs[i].length, ip, length))
 									  /* Reuse existing equivalence class.  */
 									    break;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								      /* Maybe increase the size of the line table. */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      if (line == alloc_lines)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									{
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
 									  alloc_lines = 2 * alloc_lines - linbuf_base;
 									  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+													     (alloc_lines - linbuf_base)
 													     * sizeof (*linbuf))
 										   - linbuf_base;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									}
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      linbuf[line] = ip;
 								      cureqs[line] = i;
 								      ++line;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  current->buffered_lines = line;
 								  for (i = 0;  ;  i++)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      /* Record the line start for lines in the suffix that we care about.
 								         Record one more line start than lines,
 									 so that we can compute the length of any buffered line.  */
 								      if (line == alloc_lines)
 									{
 									  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
 									  alloc_lines = 2 * alloc_lines - linbuf_base;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+													     (alloc_lines - linbuf_base)
 													     * sizeof (*linbuf))
 										   - linbuf_base;
 									}
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      linbuf[line] = (char const *) p;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								      if ((char const *) p == bufend)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									{
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									  linbuf[line]  -=  (char const *) p == incomplete_tail;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									  break;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+									}
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								      if (context <= i && no_diff_means_no_output)
 									break;
 								      line++;
 								      while (*p++ != '\n')
 									;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  /* Done with cache in local variables.  */
 								  current->linbuf = linbuf;
 								  current->valid_lines = line;
 								  current->alloc_lines = alloc_lines;
 								  current->equivs = cureqs;
 								  equivs = eqs;
 								  equivs_alloc = eqs_alloc;
 								  equivs_index = eqs_index;
 								}
 								/* Prepare the end of the text.  Make sure it's initialized.
 								   Make sure text ends in a newline,
 								   but remember that we had to add one unless -B is in effect.  */
 								static void
 								prepare_text_end (current)
 								     struct file_data *current;
 								{
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  size_t buffered_chars = current->buffered_chars;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  char *p = current->buffer;
 								  if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
 								    current->missing_newline = 0;
 								  else
 								    {
 								      p[buffered_chars++] = '\n';
 								      current->buffered_chars = buffered_chars;
 								      current->missing_newline = ! ignore_blank_lines_flag;
 								    }
 								  /* Don't use uninitialized storage when planting or using sentinels.  */
 								  if (p)
 								    bzero (p + buffered_chars, sizeof (word));
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								}
 								/* Given a vector of two file_data objects, find the identical
 								   prefixes and suffixes of each object. */
 								static void
 								find_identical_ends (filevec)
 								     struct file_data filevec[];
 								{
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  word *w0, *w1;
 								  char *p0, *p1, *buffer0, *buffer1;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  char const *end0, *beg0;
 								  char const **linbuf0, **linbuf1;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  int i, lines;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  size_t n0, n1, tem;
 								  int alloc_lines0, alloc_lines1;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  int buffered_prefix, prefix_count, prefix_mask;
 								  slurp (&filevec[0]);
 								  if (filevec[0].desc != filevec[1].desc)
 								    slurp (&filevec[1]);
 								  else
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      filevec[1].buffer = filevec[0].buffer;
 								      filevec[1].bufsize = filevec[0].bufsize;
 								      filevec[1].buffered_chars = filevec[0].buffered_chars;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  for (i = 0; i < 2; i++)
 								    prepare_text_end (&filevec[i]);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								  /* Find identical prefix.  */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  p0 = buffer0 = filevec[0].buffer;
 								  p1 = buffer1 = filevec[1].buffer;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  n0 = filevec[0].buffered_chars;
 								  n1 = filevec[1].buffered_chars;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  if (p0 == p1)
 								    /* The buffers are the same; sentinels won't work.  */
 								    p0 = p1 += n1;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								  else
 								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      /* Insert end sentinels, in this case characters that are guaranteed
 									 to make the equality test false, and thus terminate the loop.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      if (n0 < n1)
 									p0[n0] = ~p1[n0];
 								      else
 									p1[n1] = ~p0[n1];
 								      /* Loop until first mismatch, or to the sentinel characters.  */
 								      /* Compare a word at a time for speed.  */
 								      w0 = (word *) p0;
 								      w1 = (word *) p1;
 								      while (*w0++ == *w1++)
 									;
 								      --w0, --w1;
 								      /* Do the last few bytes of comparison a byte at a time.  */
 								      p0 = (char *) w0;
 								      p1 = (char *) w1;
 								      while (*p0++ == *p1++)
 									;
 								      --p0, --p1;
 								      /* Don't mistakenly count missing newline as part of prefix. */
 								      if (ROBUST_OUTPUT_STYLE (output_style)
 									  && (buffer0 + n0 - filevec[0].missing_newline < p0)
 									     !=
 									     (buffer1 + n1 - filevec[1].missing_newline < p1))
 									--p0, --p1;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  /* Now P0 and P1 point at the first nonmatching characters.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  /* Skip back to last line-beginning in the prefix,
 								     and then discard up to HORIZON_LINES lines from the prefix.  */
 								  i = horizon_lines;
 								  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    --p0, --p1;
 								  /* Record the prefix.  */
 								  filevec[0].prefix_end = p0;
 								  filevec[1].prefix_end = p1;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								  /* Find identical suffix.  */
 								  /* P0 and P1 point beyond the last chars not yet compared.  */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  p0 = buffer0 + n0;
 								  p1 = buffer1 + n1;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								  if (! ROBUST_OUTPUT_STYLE (output_style)
 								      || filevec[0].missing_newline == filevec[1].missing_newline)
 								    {
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      end0 = p0;	/* Addr of last char in file 0.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								      /* Get value of P0 at which we should stop scanning backward:
 									 this is when either P0 or P1 points just past the last char
 									 of the identical prefix.  */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								      /* Scan back until chars don't match or we reach that point.  */
 								      while (p0 != beg0)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+									if (*--p0 != *--p1)
 									  {
 									    /* Point at the first char of the matching suffix.  */
 									    ++p0, ++p1;
 									    beg0 = p0;
 									    break;
 									  }
 								      /* Are we at a line-beginning in both files?  If not, add the rest of
 									 this line to the main body.  Discard up to HORIZON_LINES lines from
 									 the identical suffix.  Also, discard one extra line,
 									 because shift_boundaries may need it.  */
 								      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
 											    &&
 											    (buffer1 == p1 || p1[-1] == '\n'));
 								      while (i-- && p0 != end0)
 									while (*p0++ != '\n')
 									  ;
 								      p1 += p0 - beg0;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    }
 								  /* Record the suffix.  */
 								  filevec[0].suffix_begin = p0;
 								  filevec[1].suffix_begin = p1;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  /* Calculate number of lines of prefix to save.
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								     prefix_count == 0 means save the whole prefix;
 								     we need this with for options like -D that output the whole file.
 								     We also need it for options like -F that output some preceding line;
 								     at least we will need to find the last few lines,
 								     but since we don't know how many, it's easiest to find them all.
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
 								     of the line buffer; they'll be moved to the proper location later.
 								     Handle 1 more line than the context says (because we count 1 too many),
 								     rounded up to the next power of 2 to speed index computation.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  if (no_diff_means_no_output && ! function_regexp_list)
 								    {
 								      for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
 									;
 								      prefix_mask = prefix_count - 1;
 								      alloc_lines0
 									= prefix_count
 									  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
 									  + context;
 								    }
 								  else
 								    {
 								      prefix_count = 0;
 								      prefix_mask = ~0;
 								      alloc_lines0 = GUESS_LINES (0, 0, n0);
 								    }
 								  lines = 0;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								  /* If the prefix is needed, find the prefix lines.  */
 								  if (! (no_diff_means_no_output
 									 && filevec[0].prefix_end == p0
 									 && filevec[1].prefix_end == p1))
 								    {
 								      p0 = buffer0;
 								      end0 = filevec[0].prefix_end;
 								      while (p0 != end0)
 									{
 									  int l = lines++ & prefix_mask;
 									  if (l == alloc_lines0)
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+									    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+															 * sizeof(*linbuf0));
 									  linbuf0[l] = p0;
 									  while (*p0++ != '\n')
 									    ;
 									}
 								    }
 								  buffered_prefix = prefix_count && context < lines ? context : lines;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  /* Allocate line buffer 1.  */
 								  tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
 								  alloc_lines1
 								    = (buffered_prefix
 								       + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
 								       + context);
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
 								  if (buffered_prefix != lines)
 								    {
 								      /* Rotate prefix lines to proper location.  */
 								      for (i = 0;  i < buffered_prefix;  i++)
 									linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
 								      for (i = 0;  i < buffered_prefix;  i++)
 									linbuf0[i] = linbuf1[i];
 								    }
 								  /* Initialize line buffer 1 from line buffer 0.  */
 								  for (i = 0; i < buffered_prefix; i++)
 								    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
 								  /* Record the line buffer, adjusted so that
 								     linbuf*[0] points at the first differing line.  */
 								  filevec[0].linbuf = linbuf0 + buffered_prefix;
 								  filevec[1].linbuf = linbuf1 + buffered_prefix;
 								  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
 								  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
 								  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
 								  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
 								}
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								/* Largest primes less than some power of two, for nbuckets.  Values range
 								   from useful to preposterous.  If one of these numbers isn't prime
 								   after all, don't blame it on me, blame it on primes (6) . . . */
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								static int const primes[] =
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								{
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 								  1048573,
 								  2097143,
 								  4194301,
 								  8388593,
 								  16777213,
 								  33554393,
 								  67108859,			/* Preposterously large . . . */
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  134217689,
 								  268435399,
 								  536870909,
 								  1073741789,
 								  2147483647,
 
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								};
 								/* Given a vector of two file_data objects, read the file associated
 								   with each one, and build the table of equivalence classes.
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								   Return 1 if either file appears to be a binary file.
 								   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								int
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								read_files (filevec, pretend_binary)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								     struct file_data filevec[];
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								     int pretend_binary;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								{
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  int i;
-												Updated to GNU Diffutils 2.4.
											
										
										
											1993-09-16 21:39:00 +04:00
+								  int skip_test = always_text_flag | pretend_binary;
 								  int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  if (filevec[0].desc != filevec[1].desc)
 								    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
 								  else
 								    {
 								      filevec[1].buffer = filevec[0].buffer;
 								      filevec[1].bufsize = filevec[0].bufsize;
 								      filevec[1].buffered_chars = filevec[0].buffered_chars;
 								    }
 								  if (appears_binary)
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								    return 1;
 								  find_identical_ends (filevec);
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
+								  equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
 								  /* Equivalence class 0 is permanently safe for lines that were not
 								     hashed.  Real equivalence classes start at 1. */
 								  equivs_index = 1;
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								  for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
 								    if (! primes[i])
 								      abort ();
 								  nbuckets = primes[i];
 								  buckets = (int *) xmalloc (nbuckets * sizeof (*buckets));
 								  bzero (buckets, nbuckets * sizeof (*buckets));
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								  for (i = 0; i < 2; ++i)
-												Update to GNU diff 2.3
											
										
										
											1993-07-09 00:13:23 +04:00
+								    find_and_hash_each_line (&filevec[i]);
-												initial import of 386bsd-0.1 sources
											
										
										
											1993-03-21 12:45:37 +03:00
 								  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
 								  free (equivs);
 								  free (buckets);
 								  return 0;
 								}