Init indexed file

This commit is contained in:
Eli-Class
2026-01-27 02:30:42 +00:00
commit 1d13e9fc3e
22 changed files with 2812 additions and 0 deletions

92
lib/dat/binary-helpers.ts Normal file
View File

@@ -0,0 +1,92 @@
export class BinaryWriter {
private chunks: Buffer[] = [];
writeUInt32(value: number): this {
const buf = Buffer.alloc(4);
buf.writeUInt32LE(value, 0);
this.chunks.push(buf);
return this;
}
writeDouble(value: number): this {
const buf = Buffer.alloc(8);
buf.writeDoubleLE(value, 0);
this.chunks.push(buf);
return this;
}
writeBigUInt64(value: bigint): this {
const buf = Buffer.alloc(8);
buf.writeBigUInt64LE(value, 0);
this.chunks.push(buf);
return this;
}
writeString(value: string): this {
const strBuf = Buffer.from(value, 'utf8');
this.writeUInt32(strBuf.length);
this.chunks.push(strBuf);
return this;
}
writeNumberArray(values: number[]): this {
this.writeUInt32(values.length);
for (const v of values) this.writeDouble(v);
return this;
}
writeStringArray(values: string[]): this {
this.writeUInt32(values.length);
for (const v of values) this.writeString(v);
return this;
}
toBuffer(): Buffer {
return Buffer.concat(this.chunks);
}
}
export class BinaryReader {
private offset = 0;
constructor(private buf: Buffer) {}
readUInt32(): number {
const v = this.buf.readUInt32LE(this.offset);
this.offset += 4;
return v;
}
readDouble(): number {
const v = this.buf.readDoubleLE(this.offset);
this.offset += 8;
return v;
}
readBigUInt64(): bigint {
const v = this.buf.readBigUInt64LE(this.offset);
this.offset += 8;
return v;
}
readString(): string {
const len = this.readUInt32();
const v = this.buf.toString('utf8', this.offset, this.offset + len);
this.offset += len;
return v;
}
readNumberArray(): number[] {
const len = this.readUInt32();
const arr: number[] = [];
for (let i = 0; i < len; i++) arr.push(this.readDouble());
return arr;
}
readStringArray(): string[] {
const len = this.readUInt32();
const arr: string[] = [];
for (let i = 0; i < len; i++) arr.push(this.readString());
return arr;
}
}

5
lib/dat/constants.ts Normal file
View File

@@ -0,0 +1,5 @@
// src/data-file/constants.ts
export const DATA_MAGIC = 'DATA';
export const DATA_VERSION = 1;
export const DATA_HEADER_SIZE = 64;
export const RECORD_HEADER_SIZE = 8;

7
lib/dat/index.ts Normal file
View File

@@ -0,0 +1,7 @@
// src/data-file/index.ts
export { DataWriter } from './writer.js';
export { DataReader } from './reader.js';
export { DataProtocol } from './protocol.js';
export * from './types.js';
export * from './constants.js';
export * from './serializers.js';

80
lib/dat/protocol.ts Normal file
View File

@@ -0,0 +1,80 @@
// src/data-file/protocol.ts
import { DATA_MAGIC, DATA_VERSION, DATA_HEADER_SIZE, RECORD_HEADER_SIZE } from './constants.js';
import { crc32 } from '../idx/index.js';
import type { Serializer } from './types.js';
export interface DataHeader {
magic: string;
version: number;
createdAt: bigint;
fileSize: bigint;
recordCount: number;
reserved: Buffer;
}
export class DataProtocol {
static createHeader(): Buffer {
const buf = Buffer.alloc(DATA_HEADER_SIZE);
buf.write(DATA_MAGIC, 0, 4, 'ascii');
buf.writeUInt32LE(DATA_VERSION, 4);
buf.writeBigUInt64LE(BigInt(Date.now()) * 1000000n, 8);
buf.writeBigUInt64LE(BigInt(DATA_HEADER_SIZE), 16);
buf.writeUInt32LE(0, 24);
return buf;
}
static readHeader(buf: Buffer): DataHeader {
return {
magic: buf.toString('ascii', 0, 4),
version: buf.readUInt32LE(4),
createdAt: buf.readBigUInt64LE(8),
fileSize: buf.readBigUInt64LE(16),
recordCount: buf.readUInt32LE(24),
reserved: buf.subarray(28, 64),
};
}
static updateHeader(buf: Buffer, fileSize: bigint, recordCount: number): void {
buf.writeBigUInt64LE(fileSize, 16);
buf.writeUInt32LE(recordCount, 24);
}
static serializeRecord<T>(data: T, serializer: Serializer<T>): Buffer {
const dataBytes = serializer.serialize(data);
const totalLen = RECORD_HEADER_SIZE + dataBytes.length;
const buf = Buffer.alloc(totalLen);
dataBytes.copy(buf, RECORD_HEADER_SIZE);
buf.writeUInt32LE(dataBytes.length, 0);
const checksum = crc32(buf, RECORD_HEADER_SIZE, totalLen);
buf.writeUInt32LE(checksum, 4);
return buf;
}
static deserializeRecord<T>(
buf: Buffer,
offset: number,
serializer: Serializer<T>
): { data: T; length: number } | null {
if (offset + RECORD_HEADER_SIZE > buf.length) return null;
const dataLen = buf.readUInt32LE(offset);
const storedChecksum = buf.readUInt32LE(offset + 4);
const totalLen = RECORD_HEADER_SIZE + dataLen;
if (offset + totalLen > buf.length) return null;
const calcChecksum = crc32(buf, offset + RECORD_HEADER_SIZE, offset + totalLen);
if (calcChecksum !== storedChecksum) {
throw new Error(`Checksum mismatch at offset ${offset}`);
}
const dataBytes = buf.subarray(offset + RECORD_HEADER_SIZE, offset + totalLen);
const data = serializer.deserialize(dataBytes);
return { data, length: totalLen };
}
}

213
lib/dat/reader.ts Normal file
View File

@@ -0,0 +1,213 @@
// src/data-file/reader.ts
import * as fs from 'node:fs';
import mmap from '@elilee/mmap-native';
import { DATA_HEADER_SIZE } from './constants.js';
import { DataProtocol, DataHeader } from './protocol.js';
import { IndexReader } from '../idx/index.js';
import type { Serializer, DataEntry } from './types.js';
export class DataReader<T> {
private fd: number | null = null;
private buffer: Buffer | null = null;
private header: DataHeader | null = null;
private indexReader: IndexReader;
private serializer: Serializer<T>;
readonly dataPath: string;
readonly indexPath: string;
constructor(basePath: string, serializer: Serializer<T>) {
this.dataPath = `${basePath}.dat`;
this.indexPath = `${basePath}.idx`;
this.serializer = serializer;
this.indexReader = new IndexReader(this.indexPath);
}
open(): void {
const stats = fs.statSync(this.dataPath);
this.fd = fs.openSync(this.dataPath, 'r');
this.buffer = mmap.map(
stats.size,
mmap.PROT_READ,
mmap.MAP_SHARED,
this.fd,
0
);
this.header = DataProtocol.readHeader(this.buffer);
this.indexReader.open();
}
getHeader(): DataHeader {
if (!this.header) throw new Error('Data file not opened');
return this.header;
}
getBySequence(sequence: number): DataEntry<T> | null {
if (!this.buffer) throw new Error('Data file not opened');
const found = this.indexReader.binarySearchBySequence(sequence);
if (!found) return null;
const result = DataProtocol.deserializeRecord(
this.buffer,
Number(found.entry.offset),
this.serializer
);
if (!result) return null;
return {
sequence: found.entry.sequence,
timestamp: found.entry.timestamp,
data: result.data,
};
}
getByIndex(index: number): DataEntry<T> | null {
if (!this.buffer) throw new Error('Data file not opened');
const entry = this.indexReader.getEntry(index);
if (!entry) return null;
const result = DataProtocol.deserializeRecord(
this.buffer,
Number(entry.offset),
this.serializer
);
if (!result) return null;
return {
sequence: entry.sequence,
timestamp: entry.timestamp,
data: result.data,
};
}
getBulkData(startSeq: number, endSeq: number): DataEntry<T>[] {
if (!this.buffer) throw new Error('Data file not opened');
const results: DataEntry<T>[] = [];
const indexHeader = this.indexReader.getHeader();
let startIdx = this.findStartIndex(startSeq, indexHeader.validCount);
for (let i = startIdx; i < indexHeader.validCount; i++) {
const entry = this.indexReader.getEntry(i);
if (!entry) continue;
if (entry.sequence > endSeq) break;
if (entry.sequence >= startSeq) {
const result = DataProtocol.deserializeRecord(
this.buffer,
Number(entry.offset),
this.serializer
);
if (result) {
results.push({
sequence: entry.sequence,
timestamp: entry.timestamp,
data: result.data,
});
}
}
}
return results;
}
private findStartIndex(targetSeq: number, validCount: number): number {
let left = 0;
let right = validCount - 1;
let result = 0;
while (left <= right) {
const mid = Math.floor((left + right) / 2);
const entry = this.indexReader.getEntry(mid);
if (!entry) {
right = mid - 1;
continue;
}
if (entry.sequence >= targetSeq) {
result = mid;
right = mid - 1;
} else {
left = mid + 1;
}
}
return result;
}
getBulkDataByTime(startTs: bigint, endTs: bigint): DataEntry<T>[] {
if (!this.buffer) throw new Error('Data file not opened');
const indexResults = this.indexReader.findByTimeRange(startTs, endTs);
const results: DataEntry<T>[] = [];
for (const { entry } of indexResults) {
const result = DataProtocol.deserializeRecord(
this.buffer,
Number(entry.offset),
this.serializer
);
if (result) {
results.push({
sequence: entry.sequence,
timestamp: entry.timestamp,
data: result.data,
});
}
}
return results;
}
getAllData(): DataEntry<T>[] {
if (!this.buffer) throw new Error('Data file not opened');
const entries = this.indexReader.getAllEntries();
const results: DataEntry<T>[] = [];
for (const entry of entries) {
const result = DataProtocol.deserializeRecord(
this.buffer,
Number(entry.offset),
this.serializer
);
if (result) {
results.push({
sequence: entry.sequence,
timestamp: entry.timestamp,
data: result.data,
});
}
}
return results;
}
getRecordCount(): number {
return this.indexReader.getHeader().validCount;
}
getLastSequence(): number {
return this.indexReader.getHeader().lastSequence;
}
close(): void {
if (this.buffer) {
mmap.unmap(this.buffer);
this.buffer = null;
}
if (this.fd !== null) {
fs.closeSync(this.fd);
this.fd = null;
}
this.header = null;
this.indexReader.close();
}
}

32
lib/dat/serializers.ts Normal file
View File

@@ -0,0 +1,32 @@
// src/data-file/serializers.ts
import type { Serializer } from './types.js';
export function jsonSerializer<T>(): Serializer<T> {
return {
serialize(data: T): Buffer {
return Buffer.from(JSON.stringify(data), 'utf8');
},
deserialize(buf: Buffer): T {
return JSON.parse(buf.toString('utf8'));
},
};
}
export function msgpackSerializer<T>(): Serializer<T> {
const { encode, decode } = require('@msgpack/msgpack');
return {
serialize(data: T): Buffer {
return Buffer.from(encode(data));
},
deserialize(buf: Buffer): T {
return decode(buf) as T;
},
};
}
export function createSerializer<T>(
serialize: (data: T) => Buffer,
deserialize: (buf: Buffer) => T
): Serializer<T> {
return { serialize, deserialize };
}

16
lib/dat/types.ts Normal file
View File

@@ -0,0 +1,16 @@
// src/data-file/types.ts
export interface Serializer<T> {
serialize(data: T): Buffer;
deserialize(buf: Buffer): T;
}
export interface DataEntry<T> {
sequence: number;
timestamp: bigint;
data: T;
}
export interface DataFileOptions<T> {
serializer: Serializer<T>;
maxEntries?: number;
}

120
lib/dat/writer.ts Normal file
View File

@@ -0,0 +1,120 @@
// src/data-file/writer.ts
import * as fs from 'node:fs';
import { DATA_HEADER_SIZE } from './constants.js';
import { DataProtocol } from './protocol.js';
import { IndexWriter } from '../idx/index.js';
import type { Serializer, DataFileOptions } from './types.js';
export class DataWriter<T> {
private fd: number | null = null;
private headerBuf: Buffer | null = null;
private currentOffset: bigint = BigInt(DATA_HEADER_SIZE);
private recordCount = 0;
private indexWriter: IndexWriter;
private serializer: Serializer<T>;
readonly dataPath: string;
readonly indexPath: string;
constructor(basePath: string, options: DataFileOptions<T>) {
this.dataPath = `${basePath}.dat`;
this.indexPath = `${basePath}.idx`;
this.serializer = options.serializer;
const maxEntries = options.maxEntries ?? 10_000_000;
this.indexWriter = new IndexWriter(this.indexPath, { maxEntries });
}
open(): void {
const isNew = !fs.existsSync(this.dataPath);
this.fd = fs.openSync(this.dataPath, isNew ? 'w+' : 'r+');
this.headerBuf = Buffer.alloc(DATA_HEADER_SIZE);
if (isNew) {
const header = DataProtocol.createHeader();
fs.writeSync(this.fd, header, 0, DATA_HEADER_SIZE, 0);
this.currentOffset = BigInt(DATA_HEADER_SIZE);
this.recordCount = 0;
} else {
fs.readSync(this.fd, this.headerBuf, 0, DATA_HEADER_SIZE, 0);
const header = DataProtocol.readHeader(this.headerBuf);
this.currentOffset = header.fileSize;
this.recordCount = header.recordCount;
}
this.indexWriter.open();
}
append(data: T, timestamp?: bigint): number {
if (this.fd === null) throw new Error('Data file not opened');
const buf = DataProtocol.serializeRecord(data, this.serializer);
const offset = this.currentOffset;
fs.writeSync(this.fd, buf, 0, buf.length, Number(offset));
const sequence = this.indexWriter.getNextSequence();
const ts = timestamp ?? BigInt(Date.now()) * 1000000n;
this.indexWriter.append(offset, buf.length, ts);
this.currentOffset += BigInt(buf.length);
this.recordCount++;
return sequence;
}
appendBulk(records: T[], timestamp?: bigint): number[] {
const sequences: number[] = [];
const ts = timestamp ?? BigInt(Date.now()) * 1000000n;
for (const record of records) {
const seq = this.append(record, ts);
sequences.push(seq);
}
return sequences;
}
getLastSequence(): number {
return this.indexWriter.getLastSequence();
}
getNextSequence(): number {
return this.indexWriter.getNextSequence();
}
sync(): void {
if (this.fd === null || !this.headerBuf) return;
DataProtocol.updateHeader(this.headerBuf, this.currentOffset, this.recordCount);
fs.writeSync(this.fd, this.headerBuf, 0, DATA_HEADER_SIZE, 0);
fs.fsyncSync(this.fd);
this.indexWriter.syncAll();
}
close(): void {
this.sync();
if (this.fd !== null) {
fs.closeSync(this.fd);
this.fd = null;
}
this.indexWriter.close();
this.headerBuf = null;
}
getStats() {
return {
dataPath: this.dataPath,
indexPath: this.indexPath,
currentOffset: this.currentOffset,
recordCount: this.recordCount,
lastSequence: this.indexWriter.getLastSequence(),
};
}
}