#Performance Tuning

Optimize PhoenixmlDb for your specific workload with these tuning guidelines.

#Query Optimization

#Use Indexes

The most important optimization is proper indexing:

// Create indexes for frequently queried paths
container.CreateIndex(new PathIndex("product-idx",
    "/product/name", "/product/category"));
container.CreateIndex(new ValueIndex("price-idx",
    "/product/price", ValueType.Decimal));

#Check Query Plans

var plan = db.Explain(query);
Console.WriteLine(plan);
// Verify indexes are used
if (!plan.IndexesUsed.Any())
{
    Console.WriteLine("Warning: Full scan, consider adding indexes");
}

#Optimize XQuery

(: Good - filter early :)
for $p in collection('products')//product[category = 'Electronics']
where $p/price > 100
return $p
(: Less efficient - filter late :)
for $p in collection('products')//product
where $p/category = 'Electronics' and $p/price > 100
return $p

#Limit Results

(: Always limit when you don't need all results :)
(for $p in collection('products')//product
 order by $p/date descending
 return $p)[position() <= 100]

#Use Parameters

// Good - compiled once, parameters bound
var prepared = db.Prepare("""
    for $p in //product where $p/category = $cat return $p
    """);
var results = prepared.Execute(new QueryParameters { ["cat"] = "Electronics" });
// Bad - recompiled every time
var results = db.Query($"for $p in //product where $p/category = '{category}' return $p");

#Storage Optimization

#Appropriate Map Size

// Too small causes "map full" errors
// Too large wastes address space
var options = new DatabaseOptions
{
    MapSize = dataSize * 2  // 2x expected data size
};

#Sync Mode

// For highest performance (reduced durability)
var options = new DatabaseOptions
{
    NoMetaSync = true  // ~10% faster commits
};
// For bulk imports (recoverable data)
var options = new DatabaseOptions
{
    NoSync = true  // ~50% faster, but data at risk
};

#Batch Writes

// Good - single transaction for multiple writes
using (var txn = db.BeginTransaction())
{
    foreach (var doc in documents)
    {
        container.PutDocument(doc.Name, doc.Content);
    }
    txn.Commit();
}
// Bad - separate transaction for each write
foreach (var doc in documents)
{
    container.PutDocument(doc.Name, doc.Content);
}

#Memory Management

#Read-Only Transactions

// Prefer read-only - no write locks, concurrent access
using (var txn = db.BeginTransaction(readOnly: true))
{
    // Multiple concurrent readers allowed
}

#Dispose Promptly

// Good - dispose immediately
using (var txn = db.BeginTransaction(readOnly: true))
{
    var result = txn.Query(xquery);
    ProcessResults(result.ToList());
}
// Bad - holding transaction open
var txn = db.BeginTransaction(readOnly: true);
var result = txn.Query(xquery);
// ... long processing ...
txn.Dispose();  // Blocks other writers

#Stream Large Documents

// Good - streaming for large documents
using var stream = container.GetDocumentAsStream("large.xml");
ProcessStream(stream);
// Bad - loads entire document into memory
var xml = container.GetDocument("large.xml");

#Index Tuning

#Index Selectivity

High-selectivity indexes are more effective:

// Good - unique or high cardinality
container.CreateIndex(new PathIndex("id-idx", "/@id"));
container.CreateIndex(new ValueIndex("email-idx", "/user/email", ValueType.String));
// Less effective - low cardinality
container.CreateIndex(new PathIndex("status-idx", "/order/status"));  // Only few values

#Composite Indexes

// Single index for common query pattern
container.CreateIndex(new PathIndex("order-lookup",
    "/order/customerId", "/order/date", "/order/status"));

#Index Maintenance

// Rebuild fragmented indexes
var stats = container.GetIndexStats("price-idx");
if (stats.Fragmentation > 0.3)  // > 30% fragmented
{
    container.RebuildIndex("price-idx");
}

#Monitoring

#Database Statistics

var stats = db.GetStatistics();
Console.WriteLine($"Containers: {stats.ContainerCount}");
Console.WriteLine($"Documents: {stats.DocumentCount}");
Console.WriteLine($"Size: {stats.UsedBytes / 1024 / 1024} MB");
Console.WriteLine($"Readers: {stats.ActiveReaders}");

#Query Metrics

var options = new QueryOptions
{
    CollectMetrics = true
};
var results = db.Query(xquery, options);
Console.WriteLine($"Parse time: {results.Metrics.ParseTime}");
Console.WriteLine($"Optimize time: {results.Metrics.OptimizeTime}");
Console.WriteLine($"Execute time: {results.Metrics.ExecuteTime}");
Console.WriteLine($"Rows scanned: {results.Metrics.RowsScanned}");
Console.WriteLine($"Index hits: {results.Metrics.IndexHits}");

#Benchmarking

#Measure Operations

var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
    db.Query(xquery);
}
sw.Stop();
Console.WriteLine($"Avg: {sw.ElapsedMilliseconds / iterations} ms/query");

#Compare Approaches

// Test different index configurations
var configs = new[]
{
    () => container.CreateIndex(new PathIndex("p1", "/a/b")),
    () => container.CreateIndex(new ValueIndex("v1", "/a/b", ValueType.String)),
    () => { /* no index */ }
};
foreach (var config in configs)
{
    ResetDatabase();
    config();
    var time = BenchmarkQuery(query);
    Console.WriteLine($"Time: {time}");
}

#Common Bottlenecks

Symptom	Likely Cause	Solution
Slow queries	Missing index	Add appropriate index
High memory	Large transactions	Use smaller batches
Slow writes	Sync overhead	Use NoMetaSync or batch
Lock contention	Long transactions	Keep transactions short
"Map full"	Insufficient size	Increase MapSize

#Platform Tuning

#Linux

# Increase max open files
ulimit -n 65536
# Disable transparent huge pages (can hurt LMDB)
echo never > /sys/kernel/mm/transparent_hugepage/enabled

#Windows

# Increase working set
# Use SSD storage
# Disable antivirus scanning on data directory

#Best Practices Summary

Index first — Create indexes for query patterns
Batch writes — Group writes in transactions
Use read-only — When only reading
Check plans — Verify index usage
Monitor metrics — Track performance over time
Keep transactions short — Minimize lock duration
Stream large data — Avoid loading into memory
SSD storage — For production workloads