spider_lib/
prelude.rs

1//! Convenient re-exports for `spider-lib` applications.
2//!
3//! Most example code in this workspace starts here:
4//!
5//! ```rust
6//! use spider_lib::prelude::*;
7//! ```
8//!
9//! The prelude intentionally groups together the "first spider" surface area:
10//! runtime types, the spider trait, common errors, parsing helpers, middleware,
11//! and the most common pipelines.
12
13/// Logging level enum used by [`CrawlerBuilder::log_level`](spider_core::CrawlerBuilder::log_level).
14pub use log::LevelFilter;
15
16/// Core runtime types and traits used to define and run a crawl.
17pub use spider_core::{
18    CrawlShapePreset,
19    // Core structs
20    Crawler,
21    CrawlerBuilder,
22    CrawlerConfig,
23    DiscoveryConfig,
24    DiscoveryMode,
25    DiscoveryRule,
26    // Core traits
27    Downloader,
28    ParseContext,
29    ReqwestClientDownloader,
30    Spider,
31    StartRequestIter,
32    StartRequests,
33    // Essential re-exports for trait implementation
34    async_trait,
35    // Core modules
36    scheduler::Scheduler,
37    state::CrawlerState,
38    // Thread-safe state primitives
39    state::{
40        ConcurrentMap, ConcurrentVec, Counter, Counter64, Flag, StateAccessMetrics, VisitedUrls,
41    },
42    stats::StatCollector,
43    tokio,
44};
45
46pub use spider_util::item::{FieldValueType, ItemFieldSchema, ItemSchema, TypedItemSchema};
47/// Parse-time output sink and item contracts used by [`Spider::parse`].
48pub use spider_util::item::{ParseOutput, ScrapedItem};
49
50/// Pipeline trait for item-processing stages.
51pub use spider_pipeline::pipeline::Pipeline;
52
53/// Helper macro used to define item structs that satisfy [`ScrapedItem`].
54pub use spider_macro::scraped_item;
55/// Middleware trait and control-flow type for request/response hooks.
56pub use spider_middleware::middleware::{Middleware, MiddlewareAction};
57/// Shared runtime data types and convenience helpers.
58pub use spider_util::{
59    error::{PipelineError, SpiderError},
60    request::{Method, Request},
61    response::{Link, LinkExtractOptions, LinkSource, LinkType, PageMetadata, Response},
62    selector::{SelectorList, SelectorNode},
63    util::{create_dir, is_same_site, normalize_origin, validate_output_dir},
64};
65
66/// Built-in middleware that is available without extra feature flags.
67pub use spider_middleware::{
68    rate_limit::RateLimitMiddleware, referer::RefererMiddleware, retry::RetryMiddleware,
69};
70
71/// File-backed HTTP response cache middleware.
72#[cfg(feature = "middleware-cache")]
73pub use spider_middleware::http_cache::HttpCacheMiddleware;
74
75/// Adaptive throttling middleware driven by observed response behavior.
76#[cfg(feature = "middleware-autothrottle")]
77pub use spider_middleware::autothrottle::AutoThrottleMiddleware;
78
79/// Proxy routing middleware.
80#[cfg(feature = "middleware-proxy")]
81pub use spider_middleware::proxy::ProxyMiddleware;
82
83/// Configurable user-agent selection and rotation middleware.
84#[cfg(feature = "middleware-user-agent")]
85pub use spider_middleware::user_agent::UserAgentMiddleware;
86
87/// `robots.txt` enforcement middleware.
88#[cfg(feature = "middleware-robots")]
89pub use spider_middleware::robots::RobotsTxtMiddleware;
90
91/// Shared cookie jar middleware.
92#[cfg(feature = "middleware-cookies")]
93pub use spider_middleware::cookies::CookieMiddleware;
94
95/// Built-in pipelines that do not require extra feature flags.
96pub use spider_pipeline::{
97    console::ConsolePipeline,
98    dedup::DeduplicationPipeline,
99    schema::{
100        SchemaExportConfig, SchemaTransformPipeline, SchemaValidationPipeline, SchemaViolation,
101    },
102    transform::{TransformOperation, TransformPipeline},
103    validation::{JsonType, ValidationPipeline, ValidationRule},
104};
105
106/// CSV file output pipeline.
107#[cfg(feature = "pipeline-csv")]
108pub use spider_pipeline::csv::CsvPipeline;
109
110/// JSON array output pipeline.
111#[cfg(feature = "pipeline-json")]
112pub use spider_pipeline::json::JsonPipeline;
113
114/// JSON Lines output pipeline.
115#[cfg(feature = "pipeline-jsonl")]
116pub use spider_pipeline::jsonl::JsonlPipeline;
117
118/// SQLite output pipeline.
119#[cfg(feature = "pipeline-sqlite")]
120pub use spider_pipeline::sqlite::SqlitePipeline;
121
122/// Streaming JSON output pipeline.
123#[cfg(feature = "pipeline-stream-json")]
124pub use spider_pipeline::stream_json::StreamJsonPipeline;
125
126/// Checkpoint types for save/resume workflows.
127#[cfg(feature = "checkpoint")]
128pub use spider_core::checkpoint::{Checkpoint, SchedulerCheckpoint};