From bd90109c70fd3d4b302ed38e7b768e6df7d323d9 Mon Sep 17 00:00:00 2001 From: tillcash Date: Thu, 30 May 2024 00:46:10 +0530 Subject: [PATCH] [HarvardHealthBlogBridge] New (#4116) --- bridges/HarvardHealthBlogBridge.php | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bridges/HarvardHealthBlogBridge.php diff --git a/bridges/HarvardHealthBlogBridge.php b/bridges/HarvardHealthBlogBridge.php new file mode 100644 index 00000000..75e7e2cf --- /dev/null +++ b/bridges/HarvardHealthBlogBridge.php @@ -0,0 +1,56 @@ +find('div[class="mb-16 md:flex"]') as $element) { + if ($count >= self::MAX_ARTICLES) { + break; + } + + $data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0); + if (!$data) { + continue; + } + + $url = $data->href; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $element->find('time', 0)->datetime, + 'title' => $data->plaintext, + 'uid' => $url, + 'uri' => $url, + ]; + + $count++; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div[class*="content-repository-content"]', 0); + if (!$article) { + return 'Content Not Found'; + } + + // Remove ads + foreach ($article->find('.inline-ad') as $remove) { + $remove->outertext = ''; + } + + return $article->innertext; + } +}