rewrite file opening code using mmap, opening big files is much much faster

This commit is contained in:
Maxime Coste 2012-11-23 18:43:10 +01:00
parent c32a7b9b74
commit 9c99c238e8

View File

@ -8,6 +8,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
#include <cstring> #include <cstring>
@ -80,60 +81,58 @@ Buffer* create_buffer_from_file(const String& filename)
throw file_access_error(filename, strerror(errno)); throw file_access_error(filename, strerror(errno));
} }
auto close_fd = on_scope_end([fd]{ close(fd); }); struct stat st;
fstat(fd, &st);
const char* data = (const char*)mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
auto cleanup = on_scope_end([&]{ munmap((void*)data, st.st_size); close(fd); });
if (Buffer* buffer = BufferManager::instance().get_buffer(filename)) if (Buffer* buffer = BufferManager::instance().get_buffer(filename))
delete buffer; delete buffer;
Buffer* buffer = new Buffer(filename, Buffer::Flags::File | Buffer::Flags::NoUndo); const char* pos = data;
String content;
char buf[256];
bool crlf = false; bool crlf = false;
bool bom = false; bool bom = false;
bool at_file_begin = true; if (st.st_size >= 3 and
while (true) data[0] == '\xEF' and data[1] == '\xBB' and data[2] == '\xBF')
{
ssize_t size = read(fd, buf, 256);
if (size == -1 or size == 0)
break;
ssize_t pos = 0;
// detect utf-8 byte order mark
if (at_file_begin and size >= 3 and
buf[0] == '\xEF' and buf[1] == '\xBB' and buf[2] == '\xBF')
{ {
bom = true; bom = true;
pos = 3; pos = data + 3;
} }
ssize_t start = pos;
while (pos < size+1) std::vector<String> lines;
const char* end = data + st.st_size;
while (pos < end)
{ {
if (buf[pos] == '\r' or pos == size) const char* line_end = pos;
while (line_end < end and *line_end != '\r' and *line_end != '\n')
++line_end;
// this should happen only when opening a file which has no
// end of line as last character.
if (line_end == end)
{
lines.emplace_back(pos, line_end);
lines.back() += '\n';
break;
}
lines.emplace_back(pos, line_end + 1);
lines.back().back() = '\n';
if (line_end+1 != end and *line_end == '\r' and *line_end+1 == '\n')
{ {
if (buf[pos] == '\r')
crlf = true; crlf = true;
pos = line_end + 2;
buffer->insert(buffer->end()-1, String(buf+start, buf+pos));
start = pos+1;
} }
++pos; else
} pos = line_end + 1;
at_file_begin = false;
} }
Buffer* buffer = new Buffer(filename, Buffer::Flags::File, std::move(lines));
OptionManager& options = buffer->options(); OptionManager& options = buffer->options();
options.set_option("eolformat", Option(crlf ? "crlf" : "lf")); options.set_option("eolformat", Option(crlf ? "crlf" : "lf"));
options.set_option("BOM", Option(bom ? "utf-8" : "no")); options.set_option("BOM", Option(bom ? "utf-8" : "no"));
// if the file ended with a \n, remove the \n added by the buffer
if (*(buffer->end() - 2) == '\n')
buffer->erase(buffer->end() - 1, buffer->end());
// enable undo data recording
buffer->flags() &= ~Buffer::Flags::NoUndo;
return buffer; return buffer;
} }