spider_util/constants/
scheduler.rs

1//! Defaults used by the scheduler and duplicate-detection path.
2
3/// Capacity of the visited URL cache in the scheduler.
4pub const VISITED_URL_CACHE_CAPACITY: u64 = 500_000;
5
6/// Default capacity for the visited URL cache when not using checkpoint.
7pub const DEFAULT_VISITED_CACHE_SIZE: u64 = 200_000;
8
9/// Maximum number of pending requests before applying backpressure.
10pub const MAX_PENDING_REQUESTS: usize = 30_000;
11
12/// Time-to-idle for visited URL cache entries (1 hour).
13pub const VISITED_URL_CACHE_TTL_SECS: u64 = 3600;
14
15/// Capacity of the Bloom filter for duplicate detection.
16pub const BLOOM_FILTER_CAPACITY: u64 = 5_000_000;
17
18/// Number of hash functions used by the Bloom filter.
19pub const BLOOM_FILTER_HASH_FUNCTIONS: usize = 5;
20
21/// Buffer size before flushing to Bloom filter.
22pub const BLOOM_BUFFER_FLUSH_SIZE: usize = 100;
23
24/// Interval in milliseconds for periodic Bloom filter flush.
25pub const BLOOM_FLUSH_INTERVAL_MS: u64 = 100;