1extern crate proc_macro;
36
37use proc_macro::TokenStream;
38use proc_macro_crate::{FoundCrate, crate_name};
39use quote::quote;
40use syn::{Fields, ItemStruct, Type, parse_macro_input};
41
42#[proc_macro_attribute]
60pub fn scraped_item(_attr: TokenStream, item: TokenStream) -> TokenStream {
61 let ast = parse_macro_input!(item as ItemStruct);
62 let name = &ast.ident;
63 let item_name = name.to_string();
64 let scraped_item_trait = item_type_tokens("ScrapedItem");
65 let item_field_schema = item_type_tokens("ItemFieldSchema");
66 let item_schema = item_type_tokens("ItemSchema");
67 let typed_item_schema = item_type_tokens("TypedItemSchema");
68 let fields = match &ast.fields {
69 Fields::Named(fields) => fields.named.iter().collect::<Vec<_>>(),
70 _ => {
71 return syn::Error::new_spanned(
72 &ast,
73 "#[scraped_item] only supports structs with named fields",
74 )
75 .to_compile_error()
76 .into();
77 }
78 };
79
80 let schema_fields = fields.iter().map(|field| {
81 let field_ident = field.ident.as_ref().expect("named field");
82 let field_name = field_ident.to_string();
83 let rust_type = quote!(#field.ty).to_string().replace(' ', "");
84 let nullable = is_option_type(&field.ty);
85 let value_type_tokens = field_value_type_tokens(&field.ty);
86
87 quote! {
88 #item_field_schema {
89 name: #field_name.to_string(),
90 rust_type: #rust_type.to_string(),
91 value_type: #value_type_tokens,
92 nullable: #nullable,
93 }
94 }
95 });
96
97 let expanded = quote! {
98 #[derive(
99 ::serde::Serialize,
100 ::serde::Deserialize,
101 Clone,
102 Debug
103 )]
104 #ast
105
106 impl #scraped_item_trait for #name {
107 fn as_any(&self) -> &dyn ::std::any::Any {
108 self
109 }
110
111 fn box_clone(&self) -> Box<dyn #scraped_item_trait + Send + Sync> {
112 Box::new(self.clone())
113 }
114
115 fn to_json_value(&self) -> ::serde_json::Value {
116 match ::serde_json::to_value(self) {
117 Ok(value) => value,
118 Err(err) => panic!("failed to serialize ScrapedItem '{}': {}", stringify!(#name), err),
119 }
120 }
121
122 fn item_schema(&self) -> ::std::option::Option<#item_schema> {
123 ::std::option::Option::Some(<Self as #typed_item_schema>::schema())
124 }
125
126 fn item_schema_version(&self) -> u32 {
127 <Self as #typed_item_schema>::schema_version()
128 }
129 }
130
131 impl #typed_item_schema for #name {
132 fn schema() -> #item_schema {
133 #item_schema {
134 item_name: #item_name.to_string(),
135 version: Self::schema_version(),
136 fields: vec![#(#schema_fields),*],
137 }
138 }
139 }
140 };
141
142 TokenStream::from(expanded)
143}
144
145fn item_type_tokens(type_name: &str) -> proc_macro2::TokenStream {
146 let ident = syn::Ident::new(type_name, proc_macro2::Span::call_site());
147
148 match runtime_crate() {
149 RuntimeCrate::SpiderLib(path) => quote!(#path::#ident),
150 RuntimeCrate::SpiderUtil(path) => quote!(#path::item::#ident),
151 }
152}
153
154fn runtime_crate() -> RuntimeCrate {
155 if let Some(path) = facade_crate_tokens("spider-lib", true) {
156 return RuntimeCrate::SpiderLib(path);
157 }
158
159 if let Some(path) = facade_crate_tokens("spider-util", false) {
160 return RuntimeCrate::SpiderUtil(path);
161 }
162
163 RuntimeCrate::SpiderUtil(
164 syn::Error::new(
165 proc_macro2::Span::call_site(),
166 "#[scraped_item] requires either `spider-lib` or `spider-util` as a dependency",
167 )
168 .to_compile_error(),
169 )
170}
171
172fn facade_crate_tokens(crate_key: &str, use_prelude: bool) -> Option<proc_macro2::TokenStream> {
173 let found = crate_name(crate_key).ok()?;
174
175 Some(match found {
176 FoundCrate::Itself => {
177 let crate_name = crate_key.replace('-', "_");
178 let ident = syn::Ident::new(&crate_name, proc_macro2::Span::call_site());
179 if use_prelude {
180 quote!(::#ident::prelude)
181 } else {
182 quote!(::#ident)
183 }
184 }
185 FoundCrate::Name(name) => {
186 let ident = syn::Ident::new(&name, proc_macro2::Span::call_site());
187 if use_prelude {
188 quote!(::#ident::prelude)
189 } else {
190 quote!(::#ident)
191 }
192 }
193 })
194}
195
196enum RuntimeCrate {
197 SpiderLib(proc_macro2::TokenStream),
198 SpiderUtil(proc_macro2::TokenStream),
199}
200
201fn is_option_type(ty: &Type) -> bool {
202 match ty {
203 Type::Path(type_path) => type_path
204 .path
205 .segments
206 .last()
207 .map(|segment| segment.ident == "Option")
208 .unwrap_or(false),
209 _ => false,
210 }
211}
212
213fn field_value_type_tokens(ty: &Type) -> proc_macro2::TokenStream {
214 let field_value_type = item_type_tokens("FieldValueType");
215 let core_ty = unwrap_option_type(ty).unwrap_or(ty);
216
217 match core_ty {
218 Type::Path(type_path) => {
219 let segment = match type_path.path.segments.last() {
220 Some(segment) => segment,
221 None => {
222 return quote!(#field_value_type::Unknown);
223 }
224 };
225 let ident = segment.ident.to_string();
226 match ident.as_str() {
227 "bool" => quote!(#field_value_type::Bool),
228 "String" | "str" => quote!(#field_value_type::String),
229 "i8" | "i16" | "i32" | "i64" | "i128" | "isize" | "u8" | "u16" | "u32" | "u64"
230 | "u128" | "usize" => quote!(#field_value_type::Integer),
231 "f32" | "f64" => quote!(#field_value_type::Float),
232 "Vec" | "VecDeque" | "HashSet" | "BTreeSet" => {
233 quote!(#field_value_type::Sequence)
234 }
235 "HashMap" | "BTreeMap" => quote!(#field_value_type::Map),
236 "Value" => quote!(#field_value_type::Json),
237 _ => quote!(#field_value_type::Unknown),
238 }
239 }
240 Type::Array(_) | Type::Slice(_) => quote!(#field_value_type::Sequence),
241 Type::Tuple(_) => quote!(#field_value_type::Sequence),
242 _ => quote!(#field_value_type::Unknown),
243 }
244}
245
246fn unwrap_option_type(ty: &Type) -> Option<&Type> {
247 let Type::Path(type_path) = ty else {
248 return None;
249 };
250 let segment = type_path.path.segments.last()?;
251 if segment.ident != "Option" {
252 return None;
253 }
254
255 let syn::PathArguments::AngleBracketed(args) = &segment.arguments else {
256 return None;
257 };
258 let Some(syn::GenericArgument::Type(inner)) = args.args.first() else {
259 return None;
260 };
261 Some(inner)
262}