Files
DeepSpeed/csrc/aio/py_lib/deepspeed_py_aio.cpp
Logan Adams 297a6840e1 Update clang-format version from 16 to 18. (#5839)
We used a slightly old version of clang-format before, this caused
issues when folks installed the latest via apt or similar rather than
python to try and fix their formatting issues. Plus installing older
versions is a pain and the formatting style of the newer version seems
better?
2024-08-06 09:14:21 -07:00

124 lines
4.2 KiB
C++

// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
/*
Copyright 2020 The Microsoft DeepSpeed Team
Licensed under the MIT license.
Functionality for swapping optimizer tensors to/from (NVMe) storage devices.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
#include <chrono>
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "deepspeed_py_aio.h"
using namespace std;
using namespace std::chrono;
#define DEBUG_DS_AIO_READ 0
#define DEBUG_DS_AIO_WRITE 0
static const std::string c_library_name = "deepspeed_aio";
int deepspeed_py_aio_write(const torch::Tensor& buffer,
const char* filename,
const int block_size,
const int queue_depth,
const bool single_submit,
const bool overlap_events,
const bool validate)
{
const auto start_time = std::chrono::high_resolution_clock::now();
deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
const auto fd = open_file(filename, false);
if (fd == -1) { return -1; }
auto write_buffer = (char*)buffer.data_ptr();
const auto num_write_bytes = static_cast<long long int>(buffer.nbytes());
std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_write_bytes, write_buffer));
std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
if (config._overlap_events) {
do_aio_operation_overlap(false, aio_ctxt, xfer_ctxt, &config, nullptr);
} else {
do_aio_operation_sequential(false, aio_ctxt, xfer_ctxt, &config, nullptr);
}
const std::chrono::duration<double> aio_time =
std::chrono::high_resolution_clock::now() - start_time;
close(fd);
if (validate) { validate_aio_operation(false, filename, write_buffer, num_write_bytes); }
const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}
int deepspeed_py_aio_read(torch::Tensor& buffer,
const char* filename,
const int block_size,
const int queue_depth,
const bool single_submit,
const bool overlap_events,
const bool validate)
{
const auto start_time = std::chrono::high_resolution_clock::now();
long long num_file_bytes;
if (-1 == get_file_size(filename, num_file_bytes)) {
const auto error_code = errno;
report_file_error(filename, " fstat for read", error_code);
return -1;
}
deepspeed_aio_config_t config(block_size, queue_depth, single_submit, overlap_events, false);
const auto fd = open_file(filename, true);
if (fd == -1) { return -1; }
auto read_buffer = (char*)buffer.data_ptr();
assert(static_cast<long long int>(buffer.nbytes()) == num_file_bytes);
std::unique_ptr<io_xfer_ctxt> xfer_ctxt(new io_xfer_ctxt(fd, 0, num_file_bytes, read_buffer));
std::unique_ptr<aio_context> aio_ctxt(new aio_context(config._block_size, config._queue_depth));
if (config._overlap_events) {
do_aio_operation_overlap(true, aio_ctxt, xfer_ctxt, &config, nullptr);
} else {
do_aio_operation_sequential(true, aio_ctxt, xfer_ctxt, &config, nullptr);
}
const std::chrono::duration<double> aio_time =
std::chrono::high_resolution_clock::now() - start_time;
close(fd);
if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); }
const std::chrono::duration<double> fn_time =
std::chrono::high_resolution_clock::now() - start_time;
std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6
<< " call = " << fn_time.count() * 1e6 << std::endl;
return 0;
}