Ticket #1652: autodetect line-endings

* On opening file, detect line-endings used by sampling some initial content.
    * If it happen to be CR or CRLF, skip fast load path, and in edit_insert_file()
      convert such line endings to '\n'.
    * Save detected line ending type for editor.

Signed-off-by: Slava Zanko <slavazanko@gmail.com>
This commit is contained in:
Paul Sokolovsky 2011-10-20 16:40:02 +03:00 committed by Ilia Maslakov
parent 849fe8e4b1
commit d1c1d6e2fb
4 changed files with 83 additions and 23 deletions

View File

@ -258,7 +258,7 @@ int edit_insert_column_of_text_from_file (WEdit * edit, int file,
long *start_pos, long *end_pos, int *col1, int *col2); long *start_pos, long *end_pos, int *col1, int *col2);
char *edit_get_word_from_pos (WEdit * edit, long start_pos, long *start, gsize * len, gsize * cut); char *edit_get_word_from_pos (WEdit * edit, long start_pos, long *start, gsize * len, gsize * cut);
long edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath); long edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type);
gboolean edit_load_back_cmd (WEdit * edit); gboolean edit_load_back_cmd (WEdit * edit);
gboolean edit_load_forward_cmd (WEdit * edit); gboolean edit_load_forward_cmd (WEdit * edit);
void edit_block_process_cmd (WEdit * edit, int macro_number); void edit_block_process_cmd (WEdit * edit, int macro_number);

View File

@ -114,6 +114,8 @@ const char VERTICAL_MAGIC[] = { '\1', '\1', '\1', '\1', '\n' };
#define space_width 1 #define space_width 1
#define DETECT_LB_TYPE_BUFLEN BUF_MEDIUM
/*** file scope type declarations ****************************************************************/ /*** file scope type declarations ****************************************************************/
/*** file scope variables ************************************************************************/ /*** file scope variables ************************************************************************/
@ -415,6 +417,37 @@ check_file_access (WEdit * edit, const vfs_path_t * filename_vpath, struct stat
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */
/**
* detect type of line breaks
*
*/
/* --------------------------------------------------------------------------------------------- */
static LineBreaks
detect_lb_type (const vfs_path_t *filename_vpath)
{
char buf[BUF_MEDIUM];
ssize_t file, sz;
file = mc_open (filename_vpath, O_RDONLY | O_BINARY);
if (file == -1)
return LB_ASIS;
sz = mc_read (file, buf, sizeof (buf) - 1);
mc_close (file);
if (sz <= 0)
return LB_ASIS;
buf[(size_t) sz] = '\0';
if (strstr (buf, "\r\n") != NULL)
return LB_WIN;
if (strchr (buf, '\r') != NULL)
return LB_MAC;
return LB_ASIS;
}
/* --------------------------------------------------------------------------------------------- */
/** /**
* Open the file and load it into the buffers, either directly or using * Open the file and load it into the buffers, either directly or using
* a filter. Return TRUE on success, FALSE on error. * a filter. Return TRUE on success, FALSE on error.
@ -431,6 +464,7 @@ static gboolean
edit_load_file (WEdit * edit) edit_load_file (WEdit * edit)
{ {
gboolean fast_load = TRUE; gboolean fast_load = TRUE;
LineBreaks lb_type = LB_ASIS;
/* Cannot do fast load if a filter is used */ /* Cannot do fast load if a filter is used */
if (edit_find_filter (edit->filename_vpath) >= 0) if (edit_find_filter (edit->filename_vpath) >= 0)
@ -455,6 +489,10 @@ edit_load_file (WEdit * edit)
edit_clean (edit); edit_clean (edit);
return FALSE; return FALSE;
} }
lb_type = detect_lb_type (edit->filename_vpath);
if (lb_type != LB_ASIS && lb_type != LB_UNIX)
fast_load = FALSE;
} }
else else
{ {
@ -478,15 +516,16 @@ edit_load_file (WEdit * edit)
&& *(vfs_path_get_by_index (edit->filename_vpath, 0)->path) != '\0') && *(vfs_path_get_by_index (edit->filename_vpath, 0)->path) != '\0')
{ {
edit->undo_stack_disable = 1; edit->undo_stack_disable = 1;
if (edit_insert_file (edit, edit->filename_vpath) < 0) if (edit_insert_file (edit, edit->filename_vpath, lb_type) < 0)
{ {
edit_clean (edit); edit_clean (edit);
return FALSE; return FALSE;
} }
edit_set_markers (edit, 0, 0, 0, 0);
edit->undo_stack_disable = 0; edit->undo_stack_disable = 0;
} }
} }
edit->lb = LB_ASIS; edit->lb = lb_type;
return TRUE; return TRUE;
} }
@ -1774,7 +1813,7 @@ user_menu (WEdit * edit, const char *menu_file, int selected_entry)
{ {
long ins_len; long ins_len;
ins_len = edit_insert_file (edit, block_file_vpath); ins_len = edit_insert_file (edit, block_file_vpath, LB_ASIS);
if (nomark == 0 && ins_len > 0) if (nomark == 0 && ins_len > 0)
edit_set_markers (edit, start_mark, start_mark + ins_len, 0, 0); edit_set_markers (edit, start_mark, start_mark + ins_len, 0, 0);
} }
@ -2040,7 +2079,7 @@ edit_get_word_from_pos (WEdit * edit, long start_pos, long *start, gsize * len,
/** inserts a file at the cursor, returns count of inserted bytes on success */ /** inserts a file at the cursor, returns count of inserted bytes on success */
long long
edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath) edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type)
{ {
char *p; char *p;
long ins_len = 0; long ins_len = 0;
@ -2120,7 +2159,19 @@ edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath)
while ((blocklen = mc_read (file, (char *) buf, TEMP_BUF_LEN)) > 0) while ((blocklen = mc_read (file, (char *) buf, TEMP_BUF_LEN)) > 0)
{ {
for (i = 0; i < blocklen; i++) for (i = 0; i < blocklen; i++)
edit_insert (edit, buf[i]); {
if (buf[i] == '\r')
{
if (lb_type == LB_MAC)
edit_insert (edit, '\n');
else if (lb_type == LB_WIN)
/* just skip */ ;
else
edit_insert (edit, '\r');
}
else
edit_insert (edit, buf[i]);
}
} }
/* highlight inserted text then not persistent blocks */ /* highlight inserted text then not persistent blocks */
if (!option_persistent_selections && edit->modified) if (!option_persistent_selections && edit->modified)
@ -2225,6 +2276,7 @@ edit_init (WEdit * edit, int y, int x, int lines, int cols, const vfs_path_t * f
edit->redo_stack_size_mask = START_STACK_SIZE - 1; edit->redo_stack_size_mask = START_STACK_SIZE - 1;
edit->redo_stack = g_malloc0 ((edit->redo_stack_size + 10) * sizeof (long)); edit->redo_stack = g_malloc0 ((edit->redo_stack_size + 10) * sizeof (long));
edit->highlight = 0;
edit->utf8 = 0; edit->utf8 = 0;
edit->converter = str_cnv_from_term; edit->converter = str_cnv_from_term;
edit_set_codeset (edit); edit_set_codeset (edit);

View File

@ -449,7 +449,9 @@ edit_get_save_file_as (WEdit * edit)
char *fname; char *fname;
vfs_path_t *ret_vpath; vfs_path_t *ret_vpath;
edit->lb = cur_lb; /* Don't change current LB type (possibly autodetected), unless user asked to. */
if (cur_lb != LB_ASIS)
edit->lb = cur_lb;
fname = tilde_expand (filename_res); fname = tilde_expand (filename_res);
g_free (filename_res); g_free (filename_res);
ret_vpath = vfs_path_from_str (fname); ret_vpath = vfs_path_from_str (fname);
@ -3021,7 +3023,7 @@ edit_paste_from_X_buf_cmd (WEdit * edit)
/* try use external clipboard utility */ /* try use external clipboard utility */
mc_event_raise (MCEVENT_GROUP_CORE, "clipboard_file_from_ext_clip", NULL); mc_event_raise (MCEVENT_GROUP_CORE, "clipboard_file_from_ext_clip", NULL);
tmp = mc_config_get_full_vpath (EDIT_CLIP_FILE); tmp = mc_config_get_full_vpath (EDIT_CLIP_FILE);
edit_insert_file (edit, tmp); edit_insert_file (edit, tmp, LB_ASIS);
vfs_path_free (tmp); vfs_path_free (tmp);
} }
@ -3136,7 +3138,7 @@ edit_insert_file_cmd (WEdit * edit)
vfs_path_t *exp_vpath; vfs_path_t *exp_vpath;
exp_vpath = vfs_path_from_str (exp); exp_vpath = vfs_path_from_str (exp);
ret = (edit_insert_file (edit, exp_vpath) >= 0); ret = (edit_insert_file (edit, exp_vpath, LB_ASIS) >= 0);
vfs_path_free (exp_vpath); vfs_path_free (exp_vpath);
if (!ret) if (!ret)
@ -3209,12 +3211,12 @@ edit_sort_cmd (WEdit * edit)
if (edit_block_delete_cmd (edit)) if (edit_block_delete_cmd (edit))
return 1; return 1;
else
{ {
vfs_path_t *tmp_vpath; vfs_path_t *tmp_vpath;
tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE); tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE);
edit_insert_file (edit, tmp_vpath); edit_insert_file (edit, tmp_vpath, LB_ASIS);
vfs_path_free (tmp_vpath); vfs_path_free (tmp_vpath);
} }
return 0; return 0;
@ -3251,16 +3253,15 @@ edit_ext_cmd (WEdit * edit)
edit_error_dialog (_("External command"), get_sys_error (_("Cannot execute command"))); edit_error_dialog (_("External command"), get_sys_error (_("Cannot execute command")));
return -1; return -1;
} }
else
edit->force |= REDRAW_COMPLETELY;
{ {
vfs_path_t *tmp_vpath; vfs_path_t *tmp_vpath;
tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE); tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE);
edit_insert_file (edit, tmp_vpath); edit_insert_file (edit, tmp_vpath, LB_ASIS);
vfs_path_free (tmp_vpath); vfs_path_free (tmp_vpath);
} }
edit->force |= REDRAW_COMPLETELY;
return 0; return 0;
} }

View File

@ -112,6 +112,13 @@ status_string (WEdit * edit, char *s, int w)
unsigned int cur_utf = 0; unsigned int cur_utf = 0;
int cw = 1; int cw = 1;
static const char *lb_names[LB_NAMES] = {
"",
"LF",
"CRLF",
"CR"
};
/* /*
* If we are at the end of file, print <EOF>, * If we are at the end of file, print <EOF>,
* otherwise print the current character as is (if printable), * otherwise print the current character as is (if printable),
@ -151,7 +158,7 @@ status_string (WEdit * edit, char *s, int w)
/* The field lengths just prevent the status line from shortening too much */ /* The field lengths just prevent the status line from shortening too much */
if (simple_statusbar) if (simple_statusbar)
g_snprintf (s, w, g_snprintf (s, w,
"%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s", "%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s %s",
edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-',
edit->modified ? 'M' : '-', edit->modified ? 'M' : '-',
macro_index < 0 ? '-' : 'R', macro_index < 0 ? '-' : 'R',
@ -161,14 +168,14 @@ status_string (WEdit * edit, char *s, int w)
edit->total_lines + 1, edit->curs1, edit->last_byte, byte_str, edit->total_lines + 1, edit->curs1, edit->last_byte, byte_str,
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
mc_global.source_codepage >= mc_global.source_codepage >=
0 ? get_codepage_id (mc_global.source_codepage) : "" 0 ? get_codepage_id (mc_global.source_codepage) : "",
#else #else
"" "",
#endif #endif
); lb_names[edit->lb]);
else else
g_snprintf (s, w, g_snprintf (s, w,
"[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s", "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s %s",
edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-',
edit->modified ? 'M' : '-', edit->modified ? 'M' : '-',
macro_index < 0 ? '-' : 'R', macro_index < 0 ? '-' : 'R',
@ -180,11 +187,11 @@ status_string (WEdit * edit, char *s, int w)
edit->total_lines + 1, edit->curs1, edit->last_byte, byte_str, edit->total_lines + 1, edit->curs1, edit->last_byte, byte_str,
#ifdef HAVE_CHARSET #ifdef HAVE_CHARSET
mc_global.source_codepage >= mc_global.source_codepage >=
0 ? get_codepage_id (mc_global.source_codepage) : "" 0 ? get_codepage_id (mc_global.source_codepage) : "",
#else #else
"" "",
#endif #endif
); lb_names[edit->lb]);
} }
/* --------------------------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------------------------- */