用PHP + XlsWriter实现百万级数据导入导出,怎么做?
针对百万级数据的导入导出,使用 PHP + XlsWriter 时需要考虑性能优化和内存管理。以下是我的完整解决方案:
一、核心优化策略
1. 导出优化方案
<?php
class LargeDataExporter
{
private $config = [
'memory_limit' => '2048M',
'max_execution_time' => 0,
'chunk_size' => 50000,
'temp_dir' => __DIR__ . '/temp/'
];
publicfunction exportToExcel($dataSource, $filename)
{
// 1. 设置环境
ini_set('memory_limit', $this->config['memory_limit']);
set_time_limit(0);
// 2. 使用流式写入
$excel = new \Vtiful\Kernel\Excel([
'path' => $this->config['temp_dir']
]);
// 3. 分块写入数据
$filePath = $this->config['temp_dir'] . $filename;
$fileObject = $excel->fileName($filePath, 'sheet1');
// 写入表头
$headers = ['ID', '姓名', '邮箱', '电话', '地址'];
$fileObject->header($headers);
// 4. 分批获取数据并写入
$offset = 0;
while ($chunkData = $dataSource->getChunk($offset, $this->config['chunk_size'])) {
$fileObject->data($chunkData);
$offset += $this->config['chunk_size'];
// 释放内存
unset($chunkData);
gc_collect_cycles();
}
// 5. 输出文件
$fileObject->output();
// 6. 分段下载
$this->streamDownload($filePath);
}
privatefunction streamDownload($filePath)
{
header('Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
header('Content-Disposition: attachment;filename="' . basename($filePath) . '"');
header('Cache-Control: max-age=0');
$chunkSize = 1024 * 1024; // 1MB chunks
$handle = fopen($filePath, 'rb');
while (!feof($handle)) {
echo fread($handle, $chunkSize);
ob_flush();
flush();
}
fclose($handle);
// 删除临时文件
unlink($filePath);
}
}
2. 导入优化方案
<?php
class LargeDataImporter
{
private $config = [
'chunk_size' => 20000,
'batch_insert_size' => 1000,
'temp_dir' => __DIR__ . '/temp/'
];
publicfunction importFromExcel($filePath)
{
// 1. 分片读取Excel
$excel = new \Vtiful\Kernel\Excel(['path' => '']);
try {
// 2. 获取总行数
$data = $excel->openFile($filePath)
->openSheet()
->getSheetData();
$totalRows = count($data);
$successCount = 0;
$errorRows = [];
// 3. 分块处理
for ($startRow = 1; $startRow <= $totalRows; $startRow += $this->config['chunk_size']) {
$chunkData = $this->readChunk($filePath, $startRow, $this->config['chunk_size']);
// 4. 批量插入数据库
$batchData = [];
foreach ($chunkData as $rowIndex => $row) {
if ($this->validateRow($row)) {
$batchData[] = $this->formatRow($row);
// 达到批次大小就插入
if (count($batchData) >= $this->config['batch_insert_size']) {
$this->batchInsert($batchData);
$successCount += count($batchData);
$batchData = [];
}
} else {
$errorRows[] = $startRow + $rowIndex;
}
}
// 插入剩余数据
if (!empty($batchData)) {
$this->batchInsert($batchData);
$successCount += count($batchData);
}
// 释放内存
unset($chunkData, $batchData);
gc_collect_cycles();
// 进度记录
$this->logProgress($startRow, $totalRows);
}
return [
'total' => $totalRows - 1, // 减去表头
'success' => $successCount,
'errors' => $errorRows
];
} catch (\Exception $e) {
thrownew \Exception("导入失败: " . $e->getMessage());
}
}
privatefunction readChunk($filePath, $startRow, $chunkSize)
{
$excel = new \Vtiful\Kernel\Excel(['path' => '']);
return $excel->openFile($filePath)
->openSheet()
->setSkipRows($startRow - 1)
->getSheetData($chunkSize);
}
privatefunction batchInsert($data)
{
// 使用批量插入优化
$placeholders = [];
$values = [];
foreach ($data as $row) {
$placeholders[] = '(' . implode(',', array_fill(0, count($row), '?')) . ')';
$values = array_merge($values, array_values($row));
}
$sql = "INSERT INTO large_table (field1, field2, field3) VALUES " .
implode(',', $placeholders) .
" ON DUPLICATE KEY UPDATE updated_at = NOW()";
// 使用PDO预处理执行
$stmt = $this->pdo->prepare($sql);
$stmt->execute($values);
}
}
二、数据库优化配置
-- 1. 创建适合批量导入的表结构
CREATETABLE large_data (
idBIGINTUNSIGNEDNOTNULL AUTO_INCREMENT,
nameVARCHAR(255),
email VARCHAR(255),
phone VARCHAR(50),
address TEXT,
created_at TIMESTAMPDEFAULTCURRENT_TIMESTAMP,
updated_at TIMESTAMPDEFAULTCURRENT_TIMESTAMPONUPDATECURRENT_TIMESTAMP,
PRIMARY KEY (id),
INDEX idx_email (email),
INDEX idx_created (created_at)
) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
-- 2. 导入时临时调整设置
SET autocommit = 0;
SET unique_checks = 0;
SET foreign_key_checks = 0;
-- 执行导入操作
SET unique_checks = 1;
SET foreign_key_checks = 1;
COMMIT;
三、完整封装类
<?php
class ExcelMassHandler
{
private $excel;
private $db;
private $config;
publicfunction __construct(array $config = [])
{
$this->config = array_merge([
'export_chunk' => 50000,
'import_chunk' => 20000,
'batch_insert' => 1000,
'temp_dir' => sys_get_temp_dir() . '/excel_mass/',
'memory_limit' => '2048M',
'time_limit' => 0
], $config);
$this->initEnvironment();
$this->initDirectories();
$this->excel = new \Vtiful\Kernel\Excel(['path' => $this->config['temp_dir']]);
}
privatefunction initEnvironment()
{
ini_set('memory_limit', $this->config['memory_limit']);
set_time_limit($this->config['time_limit']);
// 关闭输出缓冲
if (ob_get_level()) {
ob_end_clean();
}
}
privatefunction initDirectories()
{
if (!is_dir($this->config['temp_dir'])) {
mkdir($this->config['temp_dir'], 0777, true);
}
}
/**
* 流式导出
*/
publicfunction streamExport(callable $dataCallback, array $headers, $filename)
{
$tempFile = $this->config['temp_dir'] . uniqid('export_') . '.xlsx';
try {
$fileObject = $this->excel->fileName($tempFile)->header($headers);
$page = 1;
do {
$data = $dataCallback($page, $this->config['export_chunk']);
if (!empty($data)) {
$fileObject->data($data);
$page++;
// 进度回调
if (isset($this->config['progress_callback'])) {
call_user_func($this->config['progress_callback'], $page * $this->config['export_chunk']);
}
unset($data);
gc_collect_cycles();
}
} while (!empty($data));
$fileObject->output();
// 返回文件路径供下载
return $tempFile;
} catch (\Exception $e) {
if (file_exists($tempFile)) {
unlink($tempFile);
}
throw $e;
}
}
/**
* 异步导出(推荐用于超大数据)
*/
publicfunction asyncExport(callable $dataCallback, array $headers, $filename)
{
// 生成任务ID
$taskId = uniqid('export_');
$taskFile = $this->config['temp_dir'] . $taskId . '.task';
// 保存任务信息
file_put_contents($taskFile, json_encode([
'filename' => $filename,
'headers' => $headers,
'status' => 'pending',
'created_at' => time()
]));
// 异步执行(使用消息队列或后台进程)
$this->dispatchAsyncTask($taskId, $dataCallback);
return $taskId;
}
/**
* 分片导入
*/
publicfunction chunkImport($filePath, callable $processCallback, $skipHeader = true)
{
if (!file_exists($filePath)) {
thrownew \Exception("文件不存在");
}
// 获取文件信息
$fileSize = filesize($filePath);
$maxMemory = $this->convertToBytes(ini_get('memory_limit'));
// 根据文件大小自动调整分片大小
$chunkSize = $this->calculateChunkSize($fileSize, $maxMemory);
$excel = new \Vtiful\Kernel\Excel(['path' => '']);
$data = $excel->openFile($filePath)->openSheet();
$startRow = $skipHeader ? 1 : 0;
$totalRows = count($data->getSheetData());
$processed = 0;
while ($startRow < $totalRows) {
$chunkData = $data->setSkipRows($startRow)->getSheetData($chunkSize);
if (empty($chunkData)) {
break;
}
// 处理数据块
$result = $processCallback($chunkData, $startRow);
$processed += count($chunkData);
$startRow += $chunkSize;
// 进度报告
$progress = round(($processed / $totalRows) * 100, 2);
$this->log("处理进度: {$progress}% ({$processed}/{$totalRows})");
unset($chunkData);
gc_collect_cycles();
// 防止内存溢出
if (memory_get_usage(true) > $maxMemory * 0.7) {
$chunkSize = max(1000, $chunkSize / 2);
}
}
return [
'total' => $totalRows - ($skipHeader ? 1 : 0),
'processed' => $processed
];
}
privatefunction calculateChunkSize($fileSize, $maxMemory)
{
// 简单估算:每行约2KB,保留100MB内存空间
$safeMemory = $maxMemory - 100 * 1024 * 1024;
$estimatedRowSize = 2 * 1024; // 2KB per row
$calculated = floor($safeMemory / $estimatedRowSize);
return min(max(1000, $calculated), 100000);
}
privatefunction convertToBytes($value)
{
$unit = strtolower(substr($value, -1));
$number = (int) substr($value, 0, -1);
switch ($unit) {
case'g': return $number * 1024 * 1024 * 1024;
case'm': return $number * 1024 * 1024;
case'k': return $number * 1024;
default: return (int) $value;
}
}
privatefunction log($message)
{
error_log('[' . date('Y-m-d H:i:s') . '] ' . $message . PHP_EOL, 3, $this->config['temp_dir'] . 'import.log');
}
}
四、使用示例
<?php
// 1. 导出示例
$exporter = new ExcelMassHandler();
// 数据回调函数
$dataCallback = function ($page, $chunkSize) use ($db) {
$offset = ($page - 1) * $chunkSize;
$stmt = $db->prepare("SELECT * FROM large_table LIMIT ? OFFSET ?");
$stmt->execute([$chunkSize, $offset]);
return $stmt->fetchAll(PDO::FETCH_ASSOC);
};
$headers = ['ID', '姓名', '邮箱', '电话', '创建时间'];
$file = $exporter->streamExport($dataCallback, $headers, '百万数据导出.xlsx');
// 2. 导入示例
$importer = new ExcelMassHandler([
'import_chunk' => 50000,
'batch_insert' => 2000
]);
$result = $importer->chunkImport('large_data.xlsx', function ($chunkData) use ($db) {
// 处理并验证数据
$validData = [];
foreach ($chunkData as $row) {
if ($this->validateRow($row)) {
$validData[] = $row;
}
}
// 批量插入
if (!empty($validData)) {
$this->batchInsertToDB($validData);
}
return count($validData);
});
五、性能优化建议
1. 服务器配置
; php.ini 配置
memory_limit = 2048M
max_execution_time = 0
max_input_time = -1
post_max_size = 1024M
upload_max_filesize = 1024M
; OPcache 优化
opcache.enable=1
opcache.memory_consumption=256
opcache.max_accelerated_files=10000
2. Nginx 配置
client_max_body_size 1024m;
proxy_read_timeout 600s;
fastcgi_read_timeout 600s;
3. 监控建议
- 实时监控内存使用情况
- 使用 Redis 记录导入导出进度
- 添加断点续传功能
- 实现任务队列处理超大数据
六、注意事项
- 文件分割
:超过500万条数据建议分割成多个文件 - 内存监控
:实时监控内存使用,防止溢出 - 超时处理
:设置合理的超时时间,使用心跳机制 - 错误恢复
:记录失败行号,支持重新导入 - 格式验证
:导入前验证数据格式和完整性
这个方案通过分块处理、流式读写、内存优化等技术,可以有效处理百万级数据的导入导出任务。
No comments to display
No comments to display