diff --git a/README.md b/README.md index 1612897..3a7e53a 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ You can use this library to get tweets, profiles, and trends trivially. - [Get user tweets](#get-user-tweets) - [Get user medias](#get-user-medias) - [Get bookmarks](#get-bookmarks) + - [Get home tweets](#get-home-tweets) + - [Get foryou tweets](#get-foryou-tweets) - [Search tweets](#search-tweets) - [Search params](#search-params) - [Get profile](#get-profile) @@ -270,6 +272,56 @@ var cursor string tweets, cursor, err := scraper.FetchBookmarks(20, cursor) ``` +### Get home tweets + +> [!IMPORTANT] +> Requires authentication! + +500 requests / 15 minutes + +`GetHomeTweets` returns a channel with the specified number of latest home tweets. It’s using the `FetchHomeTweets` method under the hood. + +```golang +for tweet := range scraper.GetHomeTweets(context.Background(), 50) { + if tweet.Error != nil { + panic(tweet.Error) + } + fmt.Println(tweet.Text) +} +``` + +`FetchHomeTweets` returns latest home tweets and cursor for fetching the next page. Each request returns up to 20 tweets. + +```golang +var cursor string +tweets, cursor, err := scraper.FetchHomeTweets(20, cursor) +``` + +### Get foryou tweets + +> [!IMPORTANT] +> Requires authentication! + +500 requests / 15 minutes + +`GetForYouTweets` returns a channel with the specified number of for you home tweets. It’s using the `FetchForYouTweets` method under the hood. + +```golang +for tweet := range scraper.GetForYouTweets(context.Background(), 50) { + if tweet.Error != nil { + panic(tweet.Error) + } + fmt.Println(tweet.Text) +} +``` + +`FetchForYouTweets` returns for you home tweets and cursor for fetching the next page. Each request returns up to 20 tweets. + +```golang +var cursor string +tweets, cursor, err := scraper.FetchForYouTweets(20, cursor) +``` + ### Search tweets > [!IMPORTANT] diff --git a/tweets.go b/tweets.go index 618cd20..7e61fb2 100644 --- a/tweets.go +++ b/tweets.go @@ -273,3 +273,206 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) { } return nil, fmt.Errorf("tweet with ID %s not found", id) } + +type homeEntry struct { + EntryId string `json:"entryId"` + SortIndex string `json:"sortIndex"` + Content struct { + EntryType string `json:"entryType"` + ItemContent struct { + ItemType string `json:"itemType"` + TweetResults struct { + Result result `json:"result"` + } `json:"tweet_results"` + } `json:"itemContent"` + Cursor string `json:"value"` + CursorType string `json:"cursorType"` + } `json:"content"` +} + +// timeline v2 JSON object +type homeTimeline struct { + Data struct { + Home struct { + HomeTimeline struct { + Instructions []struct { + Entries []homeEntry `json:"entries"` + Type string `json:"type"` + } `json:"instructions"` + Metadata struct { + SribeConfig []struct { + Page string `json:"page"` + } `json:"scribe_config"` + } `json:"metadata"` + } `json:"home_timeline_urt"` + } `json:"home"` + } `json:"data"` +} + +func (timeline *homeTimeline) parseTweets() ([]*Tweet, string) { + var cursor string + var tweets []*Tweet + for _, instruction := range timeline.Data.Home.HomeTimeline.Instructions { + for _, entry := range instruction.Entries { + if entry.Content.CursorType == "Bottom" { + cursor = entry.Content.Cursor + } else if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" { + if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil { + tweets = append(tweets, tweet) + } + } + } + } + return tweets, cursor +} + +// GetHomeTweets returns channel with tweets from home timeline +func (s *Scraper) GetHomeTweets(ctx context.Context, maxTweetsNbr int) <-chan *TweetResult { + return getTweetTimeline(ctx, "", maxTweetsNbr, s.fetchHomeTweets) +} + +func (s *Scraper) FetchHomeTweets(maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + return s.fetchHomeTweets("", maxTweetsNbr, cursor) +} + +// FetchHomeTweets gets tweets from home timline, via the Twitter frontend API. +func (s *Scraper) fetchHomeTweets(_ string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + if maxTweetsNbr > 200 { + maxTweetsNbr = 200 + } + + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/9EwYy8pLBOSFlEoSP2STiQ/HomeLatestTimeline") + if err != nil { + return nil, "", err + } + + variables := map[string]interface{}{ + "count": maxTweetsNbr, + "includePromotedContent": true, + "withQuickPromoteEligibilityTweetFields": true, + "requestContext": "launch", + } + + if cursor != "" { + variables["cursor"] = cursor + } + + features := map[string]interface{}{ + "rweb_tipjar_consumption_enabled": true, + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "communities_web_enable_tweet_community_results_fetch": true, + "c9s_tweet_anatomy_moderator_badge_enabled": true, + "articles_preview_enabled": true, + "tweetypie_unmention_optimization_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, + "view_counts_everywhere_api_enabled": true, + "longform_notetweets_consumption_enabled": true, + "responsive_web_twitter_article_tweet_consumption_enabled": true, + "tweet_awards_web_tipping_enabled": false, + "creator_subscriptions_quote_tweet_preview_enabled": false, + "freedom_of_speech_not_reach_fetch_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true, + "rweb_video_timestamps_enabled": true, + "longform_notetweets_rich_text_read_enabled": true, + "longform_notetweets_inline_media_enabled": true, + "responsive_web_enhance_cards_enabled": false, + } + + req.Header.Set("content-type", "application/json") + + query := url.Values{} + query.Set("variables", mapToJSONString(variables)) + query.Set("features", mapToJSONString(features)) + req.URL.RawQuery = query.Encode() + + var timeline homeTimeline + err = s.RequestAPI(req, &timeline) + if err != nil { + return nil, "", err + } + + tweets, nextCursor := timeline.parseTweets() + return tweets, nextCursor, nil +} + +// GetForYouTweets returns channel with tweets from for you timeline +func (s *Scraper) GetForYouTweets(ctx context.Context, maxTweetsNbr int) <-chan *TweetResult { + return getTweetTimeline(ctx, "", maxTweetsNbr, s.fetchForYouTweets) +} + +func (s *Scraper) FetchForYouTweets(maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + return s.fetchForYouTweets("", maxTweetsNbr, cursor) +} + +// FetchForYouTweets gets tweets from for you timline, via the Twitter frontend API. +func (s *Scraper) fetchForYouTweets(_ string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + if maxTweetsNbr > 200 { + maxTweetsNbr = 200 + } + + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/1u0Wlkw6Ru1NwBUD-pDiww/HomeTimeline") + if err != nil { + return nil, "", err + } + + variables := map[string]interface{}{ + "count": maxTweetsNbr, + "includePromotedContent": true, + "latestControlAvailable": true, + "requestContext": "launch", + "withCommunity": true, + } + + if cursor != "" { + variables["cursor"] = cursor + } + + features := map[string]interface{}{ + "rweb_tipjar_consumption_enabled": true, + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "communities_web_enable_tweet_community_results_fetch": true, + "c9s_tweet_anatomy_moderator_badge_enabled": true, + "articles_preview_enabled": true, + "tweetypie_unmention_optimization_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, + "view_counts_everywhere_api_enabled": true, + "longform_notetweets_consumption_enabled": true, + "responsive_web_twitter_article_tweet_consumption_enabled": true, + "tweet_awards_web_tipping_enabled": false, + "creator_subscriptions_quote_tweet_preview_enabled": false, + "freedom_of_speech_not_reach_fetch_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true, + "rweb_video_timestamps_enabled": true, + "longform_notetweets_rich_text_read_enabled": true, + "longform_notetweets_inline_media_enabled": true, + "responsive_web_enhance_cards_enabled": false, + } + + req.Header.Set("content-type", "application/json") + + query := url.Values{} + query.Set("variables", mapToJSONString(variables)) + query.Set("features", mapToJSONString(features)) + req.URL.RawQuery = query.Encode() + + var timeline homeTimeline + err = s.RequestAPI(req, &timeline) + if err != nil { + return nil, "", err + } + + tweets, nextCursor := timeline.parseTweets() + return tweets, nextCursor, nil +} diff --git a/tweets_test.go b/tweets_test.go index dc388d8..910ed14 100644 --- a/tweets_test.go +++ b/tweets_test.go @@ -306,3 +306,149 @@ func TestTweetThread(t *testing.T) { } } } + +func TestFetchHomeTweets(t *testing.T) { + if skipAuthTest { + t.Skip("Skipping test due to environment variable") + } + tweets, _, err := testScraper.FetchHomeTweets(20, "") + if err != nil { + t.Fatal(err) + } + + if len(tweets) < 1 { + t.Fatal("returned 0 tweets") + } +} + +func TestGetHomeTweets(t *testing.T) { + if skipAuthTest { + t.Skip("Skipping test due to environment variable") + } + count := 0 + maxTweetsNbr := 150 + dupcheck := make(map[string]bool) + + for tweet := range testScraper.GetHomeTweets(context.Background(), maxTweetsNbr) { + if tweet.Error != nil { + t.Error(tweet.Error) + } else { + count++ + if tweet.ID == "" { + t.Error("Expected tweet ID is empty") + } else { + if dupcheck[tweet.ID] { + t.Errorf("Detect duplicated tweet ID: %s", tweet.ID) + } else { + dupcheck[tweet.ID] = true + } + } + if tweet.UserID == "" { + t.Error("Expected tweet UserID is empty") + } + if tweet.Username == "" { + t.Error("Expected tweet Username is empty") + } + if tweet.PermanentURL == "" { + t.Error("Expected tweet PermanentURL is empty") + } + if tweet.Text == "" { + t.Error("Expected tweet Text is empty") + } + if tweet.TimeParsed.IsZero() { + t.Error("Expected tweet TimeParsed is zero") + } + if tweet.Timestamp == 0 { + t.Error("Expected tweet Timestamp is greater than zero") + } + for _, video := range tweet.Videos { + if video.ID == "" { + t.Error("Expected tweet video ID is empty") + } + if video.Preview == "" { + t.Error("Expected tweet video Preview is empty") + } + if video.URL == "" { + t.Error("Expected tweet video URL is empty") + } + } + } + } + + if count != maxTweetsNbr { + t.Errorf("Expected tweets count=%v, got: %v", maxTweetsNbr, count) + } +} + +func TestFetchForYouTweets(t *testing.T) { + if skipAuthTest { + t.Skip("Skipping test due to environment variable") + } + tweets, _, err := testScraper.FetchForYouTweets(20, "") + if err != nil { + t.Fatal(err) + } + + if len(tweets) < 1 { + t.Fatal("returned 0 tweets") + } +} + +func TestGetForYouTweets(t *testing.T) { + if skipAuthTest { + t.Skip("Skipping test due to environment variable") + } + count := 0 + maxTweetsNbr := 150 + dupcheck := make(map[string]bool) + + for tweet := range testScraper.GetForYouTweets(context.Background(), maxTweetsNbr) { + if tweet.Error != nil { + t.Error(tweet.Error) + } else { + count++ + if tweet.ID == "" { + t.Error("Expected tweet ID is empty") + } else { + if dupcheck[tweet.ID] { + t.Errorf("Detect duplicated tweet ID: %s", tweet.ID) + } else { + dupcheck[tweet.ID] = true + } + } + if tweet.UserID == "" { + t.Error("Expected tweet UserID is empty") + } + if tweet.Username == "" { + t.Error("Expected tweet Username is empty") + } + if tweet.PermanentURL == "" { + t.Error("Expected tweet PermanentURL is empty") + } + if tweet.Text == "" { + t.Error("Expected tweet Text is empty") + } + if tweet.TimeParsed.IsZero() { + t.Error("Expected tweet TimeParsed is zero") + } + if tweet.Timestamp == 0 { + t.Error("Expected tweet Timestamp is greater than zero") + } + for _, video := range tweet.Videos { + if video.ID == "" { + t.Error("Expected tweet video ID is empty") + } + if video.Preview == "" { + t.Error("Expected tweet video Preview is empty") + } + if video.URL == "" { + t.Error("Expected tweet video URL is empty") + } + } + } + } + + if count != maxTweetsNbr { + t.Errorf("Expected tweets count=%v, got: %v", maxTweetsNbr, count) + } +}