From f86a0ea59da694aada7aca511bc640ef8d0fb12d Mon Sep 17 00:00:00 2001 From: Valentine Date: Thu, 1 Aug 2024 17:39:06 +0300 Subject: [PATCH] add GetTweetReplies method --- CHANGELOG.md | 6 ++++ README.md | 39 +++++++++++++++++++++++ replies.go | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ replies_test.go | 29 +++++++++++++++++ timeline_v2.go | 79 ++++++++++++++++++++++++++++++++++++++++++---- tweets.go | 2 +- 6 files changed, 232 insertions(+), 7 deletions(-) create mode 100644 replies.go create mode 100644 replies_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index b7de3ca..dce1224 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.0.10 + +01.08.2024 + +- Added method `GetTweetReplies` + ## v0.0.9 24.07.2024 diff --git a/README.md b/README.md index 0766bec..9ff4b46 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ You can use this library to get tweets, profiles, and trends trivially. - [Log out](#log-out) - [Methods](#methods) - [Get tweet](#get-tweet) + - [Get tweet replies](#get-tweet-replies) - [Get user tweets](#get-user-tweets) - [Get user medias](#get-user-medias) - [Get bookmarks](#get-bookmarks) @@ -212,6 +213,44 @@ scraper.Logout() tweet, err := scraper.GetTweet("1328684389388185600") ``` +### Get tweet replies + +150 requests / 15 minutes + +Returns by ~5-10 tweets and multiple cursors – one for each thread. + +```golang +var cursor string +tweets, cursors, err := scraper.GetTweetReplies("1328684389388185600", cursor) +``` + +To get all replies and replies of replies for tweet you can iterate for all cursors. To get only direct replies check if `cursor.ThreadID` is equal your tweet id. + +```golang +tweets, cursors, err := testScraper.GetTweetReplies("1328684389388185600", "") +if err != nil { + panic(err) +} + +for { + if len(cursors) > 0 { + var cursor *twitterscraper.ThreadCursor + cursor, cursors = cursors[0], cursors[1:] + moreTweets, moreCursors, err := testScraper.GetTweetReplies(tweetId, cursor.Cursor) + if err != nil { + // you can check here if rate limited, await and repeat request + panic(err) + } + tweets = append(tweets, moreTweets...) + if len(moreCursors) > 0 { + cursors = append(cursors, moreCursors...) + } + } else { + break + } +} +``` + ### Get user tweets 150 requests / 15 minutes diff --git a/replies.go b/replies.go new file mode 100644 index 0000000..93ec8dc --- /dev/null +++ b/replies.go @@ -0,0 +1,84 @@ +package twitterscraper + +import "net/url" + +type ThreadCursor struct { + FocalTweetID string + ThreadID string + Cursor string + CursorType string +} + +func (s *Scraper) GetTweetReplies(id string, cursor string) ([]*Tweet, []*ThreadCursor, error) { + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/ldqoq5MmFHN1FhMGvzC9Jg/TweetDetail") + if err != nil { + return nil, nil, err + } + + variables := map[string]interface{}{ + "focalTweetId": id, + "referrer": "tweet", + "with_rux_injections": false, + "rankingMode": "Relevance", + "includePromotedContent": true, + "withCommunity": true, + "withQuickPromoteEligibilityTweetFields": true, + "withBirdwatchNotes": true, + "withVoice": true, + } + + if cursor != "" { + variables["cursor"] = cursor + } + + features := map[string]interface{}{ + "rweb_tipjar_consumption_enabled": true, + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "communities_web_enable_tweet_community_results_fetch": true, + "c9s_tweet_anatomy_moderator_badge_enabled": true, + "articles_preview_enabled": true, + "tweetypie_unmention_optimization_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, + "view_counts_everywhere_api_enabled": true, + "longform_notetweets_consumption_enabled": true, + "responsive_web_twitter_article_tweet_consumption_enabled": true, + "tweet_awards_web_tipping_enabled": false, + "creator_subscriptions_quote_tweet_preview_enabled": false, + "freedom_of_speech_not_reach_fetch_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true, + "rweb_video_timestamps_enabled": true, + "longform_notetweets_rich_text_read_enabled": true, + "longform_notetweets_inline_media_enabled": true, + "responsive_web_enhance_cards_enabled": false, + } + + fieldToggles := map[string]interface{}{ + "withArticleRichContentState": true, + "withArticlePlainText": false, + "withGrokAnalyze": false, + "withDisallowedReplyControls": false, + } + + query := url.Values{} + query.Set("variables", mapToJSONString(variables)) + query.Set("features", mapToJSONString(features)) + query.Set("fieldToggles", mapToJSONString(fieldToggles)) + req.URL.RawQuery = query.Encode() + + var threads threadedConversation + + err = s.RequestAPI(req, &threads) + if err != nil { + return nil, nil, err + } + + tweets, cursors := threads.parse(id) + + return tweets, cursors, nil +} diff --git a/replies_test.go b/replies_test.go new file mode 100644 index 0000000..0c0c41c --- /dev/null +++ b/replies_test.go @@ -0,0 +1,29 @@ +package twitterscraper_test + +import ( + "testing" + + twitterscraper "github.com/imperatrona/twitter-scraper" +) + +func TestGetReplies(t *testing.T) { + if skipAuthTest { + t.Skip("Skipping test due to environment variable") + } + + tweetId := "1697304622749086011" + + tweets, cursors, err := testScraper.GetTweetReplies(tweetId, "") + if err != nil { + t.Fatal(err) + } + + if len(tweets) < 2 { + t.Fatal("Less than 2 tweets returned") + } + + if len(cursors) < 1 { + t.Fatal("No cursors returned") + } +} + diff --git a/timeline_v2.go b/timeline_v2.go index b1df69d..a4cebbf 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -2,6 +2,7 @@ package twitterscraper import ( "strconv" + "strings" ) type tweet struct { @@ -74,12 +75,16 @@ func (result *userResult) parse() Profile { } type item struct { - Item struct { + EntryID string `json:"entryId"` + Item struct { ItemContent struct { + ItemType string `json:"itemType"` TweetDisplayType string `json:"tweetDisplayType"` TweetResults struct { Result result `json:"result"` } `json:"tweet_results"` + CursorType string `json:"cursorType"` + Value string `json:"value"` } `json:"itemContent"` } `json:"item"` } @@ -90,6 +95,7 @@ type entry struct { Value string `json:"value"` Items []item `json:"items"` ItemContent struct { + ItemType string `json:"itemType"` TweetDisplayType string `json:"tweetDisplayType"` TweetResults struct { Result result `json:"result"` @@ -98,6 +104,8 @@ type entry struct { UserResults struct { Result userResult `json:"result"` } `json:"user_results"` + CursorType string `json:"cursorType"` + Value string `json:"value"` } `json:"itemContent"` } `json:"content"` } @@ -221,16 +229,18 @@ type threadedConversation struct { Data struct { ThreadedConversationWithInjectionsV2 struct { Instructions []struct { - Type string `json:"type"` - Entries []entry `json:"entries"` - Entry entry `json:"entry"` + Type string `json:"type"` + Entry entry `json:"entry"` + Entries []entry `json:"entries"` + ModuleItems []item `json:"moduleItems"` } `json:"instructions"` } `json:"threaded_conversation_with_injections_v2"` } `json:"data"` } -func (conversation *threadedConversation) parse() []*Tweet { +func (conversation *threadedConversation) parse(focalTweetID string) ([]*Tweet, []*ThreadCursor) { var tweets []*Tweet + var cursors []*ThreadCursor for _, instruction := range conversation.Data.ThreadedConversationWithInjectionsV2.Instructions { for _, entry := range instruction.Entries { if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" || entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" { @@ -241,6 +251,16 @@ func (conversation *threadedConversation) parse() []*Tweet { tweets = append(tweets, tweet) } } + + if entry.Content.ItemContent.CursorType != "" && entry.Content.ItemContent.Value != "" { + cursors = append(cursors, &ThreadCursor{ + FocalTweetID: focalTweetID, + ThreadID: focalTweetID, + Cursor: entry.Content.ItemContent.Value, + CursorType: entry.Content.ItemContent.CursorType, + }) + } + for _, item := range entry.Content.Items { if item.Item.ItemContent.TweetResults.Result.Typename == "Tweet" || item.Item.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" { if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil { @@ -250,9 +270,56 @@ func (conversation *threadedConversation) parse() []*Tweet { tweets = append(tweets, tweet) } } + + if item.Item.ItemContent.CursorType != "" && item.Item.ItemContent.Value != "" { + threadID := "" + + entryId := strings.Split(item.EntryID, "-") + if len(entryId) > 1 && entryId[0] == "conversationthread" { + if i, _ := strconv.Atoi(entryId[1]); i != 0 { + threadID = entryId[1] + } + } + + cursors = append(cursors, &ThreadCursor{ + FocalTweetID: focalTweetID, + ThreadID: threadID, + Cursor: item.Item.ItemContent.Value, + CursorType: item.Item.ItemContent.CursorType, + }) + } + } + } + for _, item := range instruction.ModuleItems { + if item.Item.ItemContent.TweetResults.Result.Typename == "Tweet" || item.Item.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" { + if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil { + if item.Item.ItemContent.TweetDisplayType == "SelfThread" { + tweet.IsSelfThread = true + } + tweets = append(tweets, tweet) + } + } + + if item.Item.ItemContent.CursorType != "" && item.Item.ItemContent.Value != "" { + threadID := "" + + entryId := strings.Split(item.EntryID, "-") + if len(entryId) > 1 && entryId[0] == "conversationthread" { + if i, _ := strconv.Atoi(entryId[1]); i != 0 { + threadID = entryId[1] + } + } + + cursors = append(cursors, &ThreadCursor{ + FocalTweetID: focalTweetID, + ThreadID: threadID, + Cursor: item.Item.ItemContent.Value, + CursorType: item.Item.ItemContent.CursorType, + }) } } } + for _, tweet := range tweets { if tweet.InReplyToStatusID != "" { for _, parentTweet := range tweets { @@ -273,7 +340,7 @@ func (conversation *threadedConversation) parse() []*Tweet { } } } - return tweets + return tweets, cursors } type tweetResult struct { diff --git a/tweets.go b/tweets.go index 7e61fb2..e01dedd 100644 --- a/tweets.go +++ b/tweets.go @@ -199,7 +199,7 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) { return nil, err } - tweets := conversation.parse() + tweets, _ := conversation.parse(id) for _, tweet := range tweets { if tweet.ID == id { return tweet, nil