detect byte order mark in buffers, and write back

This commit is contained in:
Maxime Coste 2012-08-10 18:48:21 +02:00
parent 7d3675fe33
commit 44621bff11
2 changed files with 22 additions and 6 deletions

View File

@ -92,14 +92,25 @@ Buffer* create_buffer_from_file(const String& filename)
String content; String content;
char buf[256]; char buf[256];
bool crlf = false; bool crlf = false;
bool bom = false;
bool at_file_begin = true;
while (true) while (true)
{ {
ssize_t size = read(fd, buf, 256); ssize_t size = read(fd, buf, 256);
if (size == -1 or size == 0) if (size == -1 or size == 0)
break; break;
ssize_t start = 0; ssize_t pos = 0;
for (ssize_t pos = 0; pos < size+1; ++pos) // detect utf-8 byte order mark
if (at_file_begin and size >= 3 and
buf[0] == '\xEF' and buf[1] == '\xBB' and buf[2] == '\xBF')
{
bom = true;
pos = 3;
}
ssize_t start = pos;
while (pos < size+1)
{ {
if (buf[pos] == '\r' or pos == size) if (buf[pos] == '\r' or pos == size)
{ {
@ -109,14 +120,15 @@ Buffer* create_buffer_from_file(const String& filename)
buffer->modify(Modification::make_insert(buffer->end(), String(buf+start, buf+pos))); buffer->modify(Modification::make_insert(buffer->end(), String(buf+start, buf+pos)));
start = pos+1; start = pos+1;
} }
++pos;
} }
at_file_begin = false;
} }
close(fd); close(fd);
if (crlf) OptionManager& option_manager = buffer->option_manager();
buffer->option_manager().set_option("eolformat", Option("crlf")); option_manager.set_option("eolformat", Option(crlf ? "crlf" : "lf"));
else option_manager.set_option("BOM", Option(bom ? "utf-8" : "no"));
buffer->option_manager().set_option("eolformat", Option("lf"));
// it never happened, buffer always was like that // it never happened, buffer always was like that
buffer->reset_undo_data(); buffer->reset_undo_data();
@ -153,6 +165,9 @@ void write_buffer_to_file(const Buffer& buffer, const String& filename)
if (fd == -1) if (fd == -1)
throw file_access_error(filename, strerror(errno)); throw file_access_error(filename, strerror(errno));
if (buffer.option_manager()["BOM"].as_string() == "utf-8")
::write(fd, "\xEF\xBB\xBF", 3);
for (size_t i = 0; i < buffer.line_count(); ++i) for (size_t i = 0; i < buffer.line_count(); ++i)
{ {
// end of lines are written according to eolformat but always // end of lines are written according to eolformat but always

View File

@ -95,6 +95,7 @@ GlobalOptionManager::GlobalOptionManager()
{ {
set_option("tabstop", Option(8)); set_option("tabstop", Option(8));
set_option("eolformat", Option("lf")); set_option("eolformat", Option("lf"));
set_option("BOM", Option("no"));
} }
} }