https://isab.run/ and finds all its subdomains using crt.sh or other certificate transparency registries:
use std::collections::HashSet;
use reqwest::Client;
use select::document::Document;
use select::predicate::Name;
fn main() {
let base_url = "https://isab.run/";
let mut visited_pages = HashSet::new();
let mut queue = Vec::new();
queue.push(base_url);
let client = Client::new();
while let Some(url) = queue.pop() {
if visited_pages.contains(&url) {
continue;
}
visited_pages.insert(url);
let res = match client.get(url).send() {
Ok(res) => res,
Err(e) => {
println!("Error getting page: {}", e);
continue;
}
};
if !res.status().is_success() {
continue;
}
let document = Document::from_read(res).unwrap();
for node in document.find(Name("a")) {
let link = node.attr("href").unwrap_or("");
if link.starts_with("/") {
queue.push(base_url.to_owned() + link);
} else if link.starts_with(base_url) {
queue.push(link);
}
}
}
// Use certificate transparency registries such as crt.sh to find all subdomains of isab.run
let crt_sh_search_url = format!("https://crt.sh/?q=%.{}&output=json", "isab.run");
let subdomains_json = reqwest::get(&crt_sh_search_url).unwrap().text().unwrap();
let subdomains: Vec<&str> = serde_json::from_str(&subdomains_json).unwrap();
println!("Subdomains of isab.run: {:?}", subdomains);
}
In this code, we first create a Client
object from the reqwest
library to make HTTP requests. Then, we use a breadth-first search algorithm to traverse all the pages on the isab.run
website, and we store the visited pages in a HashSet
to avoid visiting the same page multiple times.
To find all the subdomains of isab.run
, we use the certificate transparency registry crt.sh. We construct a URL with a search query that returns all the subdomains of isab.run
in JSON format, and we make a GET request to this URL using the reqwest
library. We then parse the JSON response into a Vec
of subdomains.
This is a basic implementation of a web crawler for the isab.run
website, and it can be further extended or optimized as needed.