A large-scale dataset for benchmarking information retrieval systems. It contains real queries from Bing search engine and human-annotated relevant passages, designed to evaluate ranking and retrieval models.
import { Bench } from 'benchthing';
const bench = new Bench('ms-marco');
await bench.run({
benchmark: 'ms-marco',
taskId: '1',
models: yourEmbeddingModels,
});
const result = await bench.getResult('1');