Struct DiscoveryConfig
pub struct DiscoveryConfig {
pub mode: DiscoveryMode,
pub discover_sitemaps: bool,
pub max_sitemap_depth: usize,
pub extract_page_metadata: bool,
pub link_extract_options: LinkExtractOptions,
pub rules: Vec<DiscoveryRule>,
}Expand description
Core runtime types and traits used to define and run a crawl. Discovery-specific runtime configuration.
Fields§
§mode: DiscoveryModeHow the runtime should discover follow-up work from responses.
discover_sitemaps: boolWhether sitemap XML should be parsed into follow-up requests.
max_sitemap_depth: usizeMaximum recursion depth for nested sitemap indexes.
extract_page_metadata: boolWhether page metadata should be extracted and attached to response metadata.
link_extract_options: LinkExtractOptionsBase link extraction options used for HTML discovery.
rules: Vec<DiscoveryRule>Optional rule-like link discovery behavior matched against source responses.
Implementations§
§impl DiscoveryConfig
impl DiscoveryConfig
pub fn new() -> DiscoveryConfig
pub fn new() -> DiscoveryConfig
Creates a new discovery config with default values.
pub fn with_mode(self, mode: DiscoveryMode) -> DiscoveryConfig
pub fn with_mode(self, mode: DiscoveryMode) -> DiscoveryConfig
Sets the discovery mode.
pub fn with_sitemaps(self, enabled: bool) -> DiscoveryConfig
pub fn with_sitemaps(self, enabled: bool) -> DiscoveryConfig
Enables or disables sitemap parsing.
pub fn with_max_sitemap_depth(self, depth: usize) -> DiscoveryConfig
pub fn with_max_sitemap_depth(self, depth: usize) -> DiscoveryConfig
Sets the maximum nested sitemap depth.
pub fn with_page_metadata(self, enabled: bool) -> DiscoveryConfig
pub fn with_page_metadata(self, enabled: bool) -> DiscoveryConfig
Enables or disables page metadata extraction.
pub fn with_link_extract_options(
self,
options: LinkExtractOptions,
) -> DiscoveryConfig
pub fn with_link_extract_options( self, options: LinkExtractOptions, ) -> DiscoveryConfig
Replaces the base link extraction options.
pub fn with_rules(
self,
rules: impl IntoIterator<Item = DiscoveryRule>,
) -> DiscoveryConfig
pub fn with_rules( self, rules: impl IntoIterator<Item = DiscoveryRule>, ) -> DiscoveryConfig
Replaces the configured discovery rules.
pub fn with_rule(self, rule: DiscoveryRule) -> DiscoveryConfig
pub fn with_rule(self, rule: DiscoveryRule) -> DiscoveryConfig
Adds a single discovery rule.
pub fn with_same_site_only(self, enabled: bool) -> DiscoveryConfig
pub fn with_same_site_only(self, enabled: bool) -> DiscoveryConfig
Sets whether only same-site links should be discovered.
pub fn with_text_links(self, enabled: bool) -> DiscoveryConfig
pub fn with_text_links(self, enabled: bool) -> DiscoveryConfig
Sets whether text content should be scanned for plain-text URLs.
pub fn with_allow_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_allow_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Restricts discovery to URLs that match at least one glob-style pattern.
pub fn with_deny_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_deny_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Excludes URLs that match any glob-style pattern.
pub fn with_allow_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_allow_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Restricts discovery to the given domains or subdomains.
pub fn with_deny_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_deny_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Excludes discovery for the given domains or subdomains.
pub fn with_allow_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_allow_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Restricts discovery to URL paths with one of the provided prefixes.
pub fn with_deny_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_deny_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Excludes URL paths with one of the provided prefixes.
Restricts attribute extraction to specific HTML tags.
pub fn with_allowed_attributes(
self,
attributes: impl IntoIterator<Item = impl Into<String>>,
) -> DiscoveryConfig
pub fn with_allowed_attributes( self, attributes: impl IntoIterator<Item = impl Into<String>>, ) -> DiscoveryConfig
Restricts attribute extraction to specific attributes.
pub fn with_allowed_link_types(
self,
link_types: impl IntoIterator<Item = LinkType>,
) -> DiscoveryConfig
pub fn with_allowed_link_types( self, link_types: impl IntoIterator<Item = LinkType>, ) -> DiscoveryConfig
Restricts discovery to the provided link types.
pub fn with_denied_link_types(
self,
link_types: impl IntoIterator<Item = LinkType>,
) -> DiscoveryConfig
pub fn with_denied_link_types( self, link_types: impl IntoIterator<Item = LinkType>, ) -> DiscoveryConfig
Excludes the provided link types from discovery.
pub fn effective_link_extract_options(&self) -> Option<LinkExtractOptions>
pub fn effective_link_extract_options(&self) -> Option<LinkExtractOptions>
Returns the effective link extraction options for the configured mode.
pub fn effective_link_extract_options_for(
&self,
options: LinkExtractOptions,
) -> Option<LinkExtractOptions>
pub fn effective_link_extract_options_for( &self, options: LinkExtractOptions, ) -> Option<LinkExtractOptions>
Returns the effective link extraction options for a specific rule or override.
pub fn should_extract_metadata(&self) -> bool
pub fn should_extract_metadata(&self) -> bool
Returns true when metadata extraction should run.
Trait Implementations§
§impl Clone for DiscoveryConfig
impl Clone for DiscoveryConfig
§fn clone(&self) -> DiscoveryConfig
fn clone(&self) -> DiscoveryConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more§impl Debug for DiscoveryConfig
impl Debug for DiscoveryConfig
§impl Default for DiscoveryConfig
impl Default for DiscoveryConfig
§fn default() -> DiscoveryConfig
fn default() -> DiscoveryConfig
§impl PartialEq for DiscoveryConfig
impl PartialEq for DiscoveryConfig
impl Eq for DiscoveryConfig
impl StructuralPartialEq for DiscoveryConfig
Auto Trait Implementations§
impl Freeze for DiscoveryConfig
impl RefUnwindSafe for DiscoveryConfig
impl Send for DiscoveryConfig
impl Sync for DiscoveryConfig
impl Unpin for DiscoveryConfig
impl UnwindSafe for DiscoveryConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key and return true if they are equal.