pub struct LinkExtractOptions {Show 13 fields
pub same_site_only: bool,
pub include_text_links: bool,
pub sources: Vec<LinkSource>,
pub allowed_link_types: Option<Vec<LinkType>>,
pub denied_link_types: Vec<LinkType>,
pub allow_patterns: Vec<String>,
pub deny_patterns: Vec<String>,
pub allow_domains: Vec<String>,
pub deny_domains: Vec<String>,
pub allow_path_prefixes: Vec<String>,
pub deny_path_prefixes: Vec<String>,
pub allowed_tags: Option<Vec<String>>,
pub allowed_attributes: Option<Vec<String>>,
}Expand description
Options that control link extraction from a Response.
The defaults are intentionally conservative for crawler use: same-site filtering is enabled, text links are included, and common HTML elements are scanned for navigable URLs.
Fields§
§same_site_only: boolRestrict discovered links to the same registered domain.
include_text_links: boolInclude URLs found in text content.
sources: Vec<LinkSource>HTML sources used to discover attribute-based links.
allowed_link_types: Option<Vec<LinkType>>Optional allow-list of link types to include.
denied_link_types: Vec<LinkType>Optional deny-list of link types to exclude.
allow_patterns: Vec<String>Optional allow-list of glob-style URL patterns (* and ? supported).
deny_patterns: Vec<String>Optional deny-list of glob-style URL patterns (* and ? supported).
allow_domains: Vec<String>Optional allow-list of domains or registered-domain suffixes.
deny_domains: Vec<String>Optional deny-list of domains or registered-domain suffixes.
allow_path_prefixes: Vec<String>Optional allow-list of URL path prefixes.
deny_path_prefixes: Vec<String>Optional deny-list of URL path prefixes.
Optional allow-list of HTML tag names used for attribute extraction.
allowed_attributes: Option<Vec<String>>Optional allow-list of attribute names used for attribute extraction.
Implementations§
Source§impl LinkExtractOptions
impl LinkExtractOptions
Sourcepub fn same_site_only(self, same_site_only: bool) -> Self
pub fn same_site_only(self, same_site_only: bool) -> Self
Sets whether only same-site URLs should be returned.
Sourcepub fn include_text_links(self, include_text_links: bool) -> Self
pub fn include_text_links(self, include_text_links: bool) -> Self
Sets whether URLs found in text content should be returned.
Sourcepub fn with_sources(self, sources: impl IntoIterator<Item = LinkSource>) -> Self
pub fn with_sources(self, sources: impl IntoIterator<Item = LinkSource>) -> Self
Replaces the configured HTML extraction sources.
Sourcepub fn add_source(self, source: LinkSource) -> Self
pub fn add_source(self, source: LinkSource) -> Self
Adds an HTML extraction source.
Sourcepub fn with_allowed_link_types(
self,
allowed_link_types: impl IntoIterator<Item = LinkType>,
) -> Self
pub fn with_allowed_link_types( self, allowed_link_types: impl IntoIterator<Item = LinkType>, ) -> Self
Restricts extraction to the provided link types.
Sourcepub fn with_denied_link_types(
self,
denied_link_types: impl IntoIterator<Item = LinkType>,
) -> Self
pub fn with_denied_link_types( self, denied_link_types: impl IntoIterator<Item = LinkType>, ) -> Self
Adds link types that should be excluded even if discovered.
Sourcepub fn allow_pattern(self, pattern: impl Into<String>) -> Self
pub fn allow_pattern(self, pattern: impl Into<String>) -> Self
Adds a glob-style allow pattern that URLs must match.
Sourcepub fn with_allow_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_allow_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the glob-style allow patterns.
Sourcepub fn deny_pattern(self, pattern: impl Into<String>) -> Self
pub fn deny_pattern(self, pattern: impl Into<String>) -> Self
Adds a glob-style deny pattern that excludes matching URLs.
Sourcepub fn with_deny_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_deny_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the glob-style deny patterns.
Sourcepub fn allow_domain(self, domain: impl Into<String>) -> Self
pub fn allow_domain(self, domain: impl Into<String>) -> Self
Adds a domain or registered-domain suffix to allow.
Sourcepub fn with_allow_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_allow_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the allowed domains.
Sourcepub fn deny_domain(self, domain: impl Into<String>) -> Self
pub fn deny_domain(self, domain: impl Into<String>) -> Self
Adds a domain or registered-domain suffix to deny.
Sourcepub fn with_deny_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_deny_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the denied domains.
Sourcepub fn allow_path_prefix(self, prefix: impl Into<String>) -> Self
pub fn allow_path_prefix(self, prefix: impl Into<String>) -> Self
Adds a URL path prefix that links must match.
Sourcepub fn with_allow_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_allow_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the allowed URL path prefixes.
Sourcepub fn deny_path_prefix(self, prefix: impl Into<String>) -> Self
pub fn deny_path_prefix(self, prefix: impl Into<String>) -> Self
Adds a URL path prefix that should be excluded.
Sourcepub fn with_deny_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_deny_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Replaces the denied URL path prefixes.
Restricts attribute-based extraction to specific HTML tag names.
Sourcepub fn with_allowed_attributes(
self,
attributes: impl IntoIterator<Item = impl Into<String>>,
) -> Self
pub fn with_allowed_attributes( self, attributes: impl IntoIterator<Item = impl Into<String>>, ) -> Self
Restricts attribute-based extraction to specific attribute names.
Trait Implementations§
Source§impl Clone for LinkExtractOptions
impl Clone for LinkExtractOptions
Source§fn clone(&self) -> LinkExtractOptions
fn clone(&self) -> LinkExtractOptions
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more