From f67d2eb88adc597cc57fbfc402c28725b671e5a3 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 28 Dec 2023 13:53:06 +0100 Subject: [PATCH] [TikTokBridge] Use embed iframe to bypass scraping protection (#3864) The Tiktok Website was totally changed using some "scraping" protection (passing as parameter value generated somewhere in the bunch of javascript to the "API URL" that was before). The iframe embed does not have such protection. It has less information (no date, ...) but it's better than nothing ! --- bridges/TikTokBridge.php | 66 ++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 73a18b04..6590df66 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -8,12 +8,12 @@ class TikTokBridge extends BridgeAbstract const MAINTAINER = 'VerifiedJoseph'; const PARAMETERS = [ 'By user' => [ - 'username' => [ - 'name' => 'Username', - 'type' => 'text', - 'required' => true, - 'exampleValue' => '@tiktok', - ] + 'username' => [ + 'name' => 'Username', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '@tiktok', + ] ]]; const TEST_DETECT_PARAMETERS = [ @@ -24,53 +24,33 @@ class TikTokBridge extends BridgeAbstract const CACHE_TIMEOUT = 900; // 15 minutes - private $feedName = ''; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); + $html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername()); - $title = $html->find('h1', 0)->plaintext ?? self::NAME; - $this->feedName = htmlspecialchars_decode($title); + $author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME; - $var = $html->find('script[id=SIGI_STATE]', 0); - if (!$var) { - throw new \Exception('Unable to find tiktok user data for ' . $this->processUsername()); - } - $SIGI_STATE_RAW = $var->innertext; - $SIGI_STATE = Json::decode($SIGI_STATE_RAW, false); + $videos = $html->find('div[data-e2e=common-videoList-VideoContainer]'); - if (!isset($SIGI_STATE->ItemModule)) { - return; - } - - foreach ($SIGI_STATE->ItemModule as $key => $value) { + foreach ($videos as $video) { $item = []; - $link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id; - $image = $value->video->dynamicCover; - if (empty($image)) { - $image = $value->video->cover; - } - $views = $value->stats->playCount; - $hastags = []; - foreach ($value->textExtra as $tag) { - $hastags[] = $tag->hashtagName; - } - $hastags_str = ''; - foreach ($hastags as $tag) { - $hastags_str .= '#' . $tag . ' '; - } + // Handle link "untracking" + $linkParts = parse_url($video->find('a', 0)->href); + $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + + $image = $video->find('video', 0)->poster; + $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; + + $enclosures = [$image]; $item['uri'] = $link; - $item['title'] = $value->desc; - $item['timestamp'] = $value->createTime; - $item['author'] = '@' . $value->author; - $item['enclosures'][] = $image; - $item['categories'] = $hastags; + $item['title'] = 'Video'; + $item['author'] = '@' . $author; + $item['enclosures'] = $enclosures; $item['content'] = << -

{$views} views


Hashtags: {$hastags_str} +

{$views} views


EOD; $this->items[] = $item; @@ -91,7 +71,7 @@ EOD; { switch ($this->queriedContext) { case 'By user': - return $this->feedName . ' (' . $this->processUsername() . ') - TikTok'; + return $this->processUsername() . ' - TikTok'; default: return parent::getName(); }