6.Write a Java program to implement a concurrent web crawler that crawls multiple websites simultaneously using threads.
💡Code:
class MyThread extends Thread { private int start, end; private int sum = 0; MyThread(int start, int end) { this.start = start; this.end = end; } public void run() { System.out.println("My thread is running"); for (int i = start; i <= end; i++) { if (isPrime(i)) sum += i; } } boolean isPrime(int n) { if (n == 0 || n == 1) return false; for (int i = 2; i <= Math.sqrt(n); ++i) { if (n % i == 0) return false; } return true; } public int getSum() { return sum; } } // Write a Java program to implement a concurrent web crawler that crawls multiple websites simultaneously using threads. import java.util.HashSet; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; class Main { private static final int MAX_DEPTH = 2; // Maximum depth for crawling private static final int THREAD_POOL_SIZE = 5; // Number of threads private Set< String > visitedUrls = new HashSet< String >(); private ExecutorService executorService = Executors.newFixedThreadPool(THREAD_POOL_SIZE); public void startCrawling(String startUrl, int currentDepth) { if (currentDepth > MAX_DEPTH || visitedUrls.contains(startUrl)) { return; } visitedUrls.add(startUrl); executorService.execute(new WebPageCrawler(startUrl, currentDepth)); } public void shutdown() { executorService.shutdown(); } private class WebPageCrawler implements Runnable { private String url; private int depth; public WebPageCrawler(String url, int depth) { this.url = url; this.depth = depth; } @Override public void run() { System.out.println("Crawling " + url + " at depth " + depth); // Implement your web crawling logic here // For demonstration purposes, let's assume a simple delay try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } // Fetch links from the web page and start crawling each link Set links = getLinksFromPage(url); for (String link : links) { startCrawling(link, depth + 1); } } private Set getLinksFromPage(String url) { // Implement logic to fetch links from the web page // This could involve using a library like Jsoup or HttpClient // For demonstration purposes, returning a dummy set of links Set dummyLinks = new HashSet(); dummyLinks.add("https://google.com/link1"); dummyLinks.add("https://google.com/link2"); return dummyLinks; } } public static void main(String[] args) { Main webCrawler = new Main(); webCrawler.startCrawling("https://google.com", 0); // Allow some time for crawling try { Thread.sleep(5000); } catch (InterruptedException e) { e.printStackTrace(); } webCrawler.shutdown(); } }
📸Output :
