diff --git a/README.md b/README.md index 2b8ac25..7c7cdb0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,75 @@ go get -u github.com/n0madic/twitter-scraper ## Usage +### Authentication + +Authentication is required for all methods now! + +#### Login + +```golang +err := scraper.Login("username", "password") +``` + +Use username to login, not email! +But if you have email confirmation, use email address in addition: + +```golang +err := scraper.Login("username", "password", "email") +``` + +If you have two-factor authentication, use code: + +```golang +err := scraper.Login("username", "password", "code") +``` + +Status of login can be checked with: + +```golang +scraper.IsLoggedIn() +``` + +Logout (clear session): + +```golang +scraper.Logout() +``` + +If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`. + +For example, save cookies: + +```golang +cookies := scraper.GetCookies() +// serialize to JSON +js, _ := json.Marshal(cookies) +// save to file +f, _ = os.Create("cookies.json") +f.Write(js) +``` + +and load cookies: + +```golang +f, _ := os.Open("cookies.json") +// deserialize from JSON +var cookies []*http.Cookie +json.NewDecoder(f).Decode(&cookies) +// load cookies +scraper.SetCookies(cookies) +// check login status +scraper.IsLoggedIn() +``` + +#### Open account + +If you don't want to use your account, you can try login as a Twitter app: + +```golang +err := scraper.LoginOpenAccount() +``` + ### Get user tweets ```golang @@ -29,7 +98,10 @@ import ( func main() { scraper := twitterscraper.New() - + err := scraper.LoginOpenAccount() + if err !== nil { + panic(err) + } for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) { if tweet.Error != nil { panic(tweet.Error) @@ -54,6 +126,10 @@ import ( func main() { scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err !== nil { + panic(err) + } tweet, err := scraper.GetTweet("1328684389388185600") if err != nil { panic(err) @@ -124,6 +200,7 @@ import ( func main() { scraper := twitterscraper.New() + scraper.LoginOpenAccount() profile, err := scraper.GetProfile("Twitter") if err != nil { panic(err) @@ -178,76 +255,6 @@ func main() { } ``` -### Use authentication - -Some specified user tweets are protected that you must login and follow. -It is also required to search. - -#### Login - -```golang -err := scraper.Login("username", "password") -``` - -Use username to login, not email! -But if you have email confirmation, use email address in addition: - -```golang -err := scraper.Login("username", "password", "email") -``` - -If you have two-factor authentication, use code: - -```golang -err := scraper.Login("username", "password", "code") -``` - -Status of login can be checked with: - -```golang -scraper.IsLoggedIn() -``` - -Logout (clear session): - -```golang -scraper.Logout() -``` - -If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`. - -For example, save cookies: - -```golang -cookies := scraper.GetCookies() -// serialize to JSON -js, _ := json.Marshal(cookies) -// save to file -f, _ = os.Create("cookies.json") -f.Write(js) -``` - -and load cookies: - -```golang -f, _ := os.Open("cookies.json") -// deserialize from JSON -var cookies []*http.Cookie -json.NewDecoder(f).Decode(&cookies) -// load cookies -scraper.SetCookies(cookies) -// check login status -scraper.IsLoggedIn() -``` - -#### Open account - -If you don't want to use your account, you can login as a Twitter app: - -```golang -err := scraper.LoginOpenAccount() -``` - ### Use Proxy Support HTTP(s) and SOCKS5 proxy diff --git a/auth.go b/auth.go index 705b464..8933f7f 100644 --- a/auth.go +++ b/auth.go @@ -289,6 +289,7 @@ func (s *Scraper) Login(credentials ...string) error { } s.isLogged = true + s.isOpenAccount = false return nil } @@ -339,6 +340,7 @@ func (s *Scraper) LoginOpenAccount() error { s.oAuthToken = info.Subtasks[0].OpenAccount.OAuthToken s.oAuthSecret = info.Subtasks[0].OpenAccount.OAuthTokenSecret s.isLogged = true + s.isOpenAccount = true } } return nil @@ -356,6 +358,7 @@ func (s *Scraper) Logout() error { } s.isLogged = false + s.isOpenAccount = false s.guestToken = "" s.oAuthToken = "" s.oAuthSecret = "" diff --git a/profile_test.go b/profile_test.go index 156c184..bdde60d 100644 --- a/profile_test.go +++ b/profile_test.go @@ -31,6 +31,10 @@ func TestGetProfile(t *testing.T) { } scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } profile, err := scraper.GetProfile("nomadic_ua") if err != nil { t.Error(err) @@ -83,6 +87,10 @@ func TestGetProfilePrivate(t *testing.T) { } scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } // some random private profile (found via google) profile, err := scraper.GetProfile("tomdumont") if err != nil { @@ -114,7 +122,11 @@ func TestGetProfilePrivate(t *testing.T) { func TestGetProfileErrorSuspended(t *testing.T) { scraper := twitterscraper.New() - _, err := scraper.GetProfile("123") + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } + _, err = scraper.GetProfile("123") if err == nil { t.Error("Expected Error, got success") } else { @@ -128,7 +140,11 @@ func TestGetProfileErrorNotFound(t *testing.T) { neUser := "sample3123131" expectedError := fmt.Sprintf("User '%s' not found", neUser) scraper := twitterscraper.New() - _, err := scraper.GetProfile(neUser) + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } + _, err = scraper.GetProfile(neUser) if err == nil { t.Error("Expected Error, got success") } else { @@ -140,6 +156,10 @@ func TestGetProfileErrorNotFound(t *testing.T) { func TestGetUserIDByScreenName(t *testing.T) { scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } userID, err := scraper.GetUserIDByScreenName("Twitter") if err != nil { t.Errorf("getUserByScreenName() error = %v", err) diff --git a/scraper.go b/scraper.go index dc23676..6a0fe3d 100644 --- a/scraper.go +++ b/scraper.go @@ -23,6 +23,7 @@ type Scraper struct { guestCreatedAt time.Time includeReplies bool isLogged bool + isOpenAccount bool oAuthToken string oAuthSecret string proxy string diff --git a/search_test.go b/search_test.go index 65c42ac..fcd03b4 100644 --- a/search_test.go +++ b/search_test.go @@ -2,7 +2,6 @@ package twitterscraper_test import ( "context" - "os" "testing" twitterscraper "github.com/n0madic/twitter-scraper" @@ -11,9 +10,6 @@ import ( var searchScraper = twitterscraper.New() func TestFetchSearchCursor(t *testing.T) { - if os.Getenv("SKIP_AUTH_TEST") != "" { - t.Skip("Skipping test due to environment variable") - } err := searchScraper.LoginOpenAccount() if err != nil { t.Fatal(err) diff --git a/timeline_v1.go b/timeline_v1.go index a69555a..9ae4aea 100644 --- a/timeline_v1.go +++ b/timeline_v1.go @@ -88,15 +88,16 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet { username := timeline.GlobalObjects.Users[tweet.UserIDStr].ScreenName name := timeline.GlobalObjects.Users[tweet.UserIDStr].Name tw := &Tweet{ - ID: id, - Likes: tweet.FavoriteCount, - Name: name, - PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, id), - Replies: tweet.ReplyCount, - Retweets: tweet.RetweetCount, - Text: tweet.FullText, - UserID: tweet.UserIDStr, - Username: username, + ID: id, + ConversationID: tweet.ConversationIDStr, + Likes: tweet.FavoriteCount, + Name: name, + PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, id), + Replies: tweet.ReplyCount, + Retweets: tweet.RetweetCount, + Text: tweet.FullText, + UserID: tweet.UserIDStr, + Username: username, } tm, err := time.Parse(time.RubyDate, tweet.CreatedAt) @@ -125,6 +126,10 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet { tw.RetweetedStatusID = tweet.RetweetedStatusIDStr } + if tweet.SelfThread.IDStr == id { + tw.IsSelfThread = true + } + if tweet.Views.Count != "" { views, viewsErr := strconv.Atoi(tweet.Views.Count) if viewsErr != nil { diff --git a/tweets.go b/tweets.go index 5a44f43..06438a9 100644 --- a/tweets.go +++ b/tweets.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/url" + "strconv" ) // GetTweets returns channel with tweets for a given user. @@ -18,6 +19,9 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]* return nil, "", err } + if s.isOpenAccount { + return s.FetchTweetsByUserIDLegacy(userID, maxTweetsNbr, cursor) + } return s.FetchTweetsByUserID(userID, maxTweetsNbr, cursor) } @@ -83,74 +87,123 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st return tweets, nextCursor, nil } +// FetchTweetsByUserIDLegacy gets tweets for a given userID, via the Twitter frontend legacy API. +func (s *Scraper) FetchTweetsByUserIDLegacy(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + if maxTweetsNbr > 200 { + maxTweetsNbr = 200 + } + + req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json") + if err != nil { + return nil, "", err + } + + q := req.URL.Query() + q.Add("count", strconv.Itoa(maxTweetsNbr)) + q.Add("userId", userID) + if cursor != "" { + q.Add("cursor", cursor) + } + req.URL.RawQuery = q.Encode() + + var timeline timelineV1 + err = s.RequestAPI(req, &timeline) + if err != nil { + return nil, "", err + } + + tweets, nextCursor := timeline.parseTweets() + return tweets, nextCursor, nil +} + // GetTweet get a single tweet by ID. func (s *Scraper) GetTweet(id string) (*Tweet, error) { - req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail") - if err != nil { - return nil, err - } + if s.isOpenAccount { + req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/conversation/"+id+".json") + if err != nil { + return nil, err + } - variables := map[string]interface{}{ - "focalTweetId": id, - "with_rux_injections": false, - "includePromotedContent": true, - "withCommunity": true, - "withQuickPromoteEligibilityTweetFields": true, - "withBirdwatchNotes": true, - "withVoice": true, - "withV2Timeline": true, - } + var timeline timelineV1 + err = s.RequestAPI(req, &timeline) + if err != nil { + return nil, err + } - features := map[string]interface{}{ - "rweb_lists_timeline_redesign_enabled": true, - "responsive_web_graphql_exclude_directive_enabled": true, - "verified_phone_label_enabled": false, - "creator_subscriptions_tweet_preview_api_enabled": true, - "responsive_web_graphql_timeline_navigation_enabled": true, - "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, - "tweetypie_unmention_optimization_enabled": true, - "responsive_web_edit_tweet_api_enabled": true, - "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, - "view_counts_everywhere_api_enabled": true, - "longform_notetweets_consumption_enabled": true, - "tweet_awards_web_tipping_enabled": false, - "freedom_of_speech_not_reach_fetch_enabled": true, - "standardized_nudges_misinfo": true, - "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false, - "longform_notetweets_rich_text_read_enabled": true, - "longform_notetweets_inline_media_enabled": true, - "responsive_web_enhance_cards_enabled": false, - } + tweets, _ := timeline.parseTweets() + for _, tweet := range tweets { + if tweet.ID == id { + return tweet, nil + } + } + } else { + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail") + if err != nil { + return nil, err + } - query := url.Values{} - query.Set("variables", mapToJSONString(variables)) - query.Set("features", mapToJSONString(features)) - req.URL.RawQuery = query.Encode() + variables := map[string]interface{}{ + "focalTweetId": id, + "with_rux_injections": false, + "includePromotedContent": true, + "withCommunity": true, + "withQuickPromoteEligibilityTweetFields": true, + "withBirdwatchNotes": true, + "withVoice": true, + "withV2Timeline": true, + } - var conversation threadedConversation + features := map[string]interface{}{ + "rweb_lists_timeline_redesign_enabled": true, + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "tweetypie_unmention_optimization_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, + "view_counts_everywhere_api_enabled": true, + "longform_notetweets_consumption_enabled": true, + "tweet_awards_web_tipping_enabled": false, + "freedom_of_speech_not_reach_fetch_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false, + "longform_notetweets_rich_text_read_enabled": true, + "longform_notetweets_inline_media_enabled": true, + "responsive_web_enhance_cards_enabled": false, + } - // Surprisingly, if bearerToken2 is not set, then animated GIFs are not - // present in the response for tweets with a GIF + a photo like this one: - // https://twitter.com/Twitter/status/1580661436132757506 - curBearerToken := s.bearerToken - if curBearerToken != bearerToken2 { - s.setBearerToken(bearerToken2) - } + query := url.Values{} + query.Set("variables", mapToJSONString(variables)) + query.Set("features", mapToJSONString(features)) + req.URL.RawQuery = query.Encode() - err = s.RequestAPI(req, &conversation) + var conversation threadedConversation - if curBearerToken != bearerToken2 { - s.setBearerToken(curBearerToken) - } + // Surprisingly, if bearerToken2 is not set, then animated GIFs are not + // present in the response for tweets with a GIF + a photo like this one: + // https://twitter.com/Twitter/status/1580661436132757506 + curBearerToken := s.bearerToken + if curBearerToken != bearerToken2 { + s.setBearerToken(bearerToken2) + } - if err != nil { - return nil, err - } + err = s.RequestAPI(req, &conversation) - tweets := conversation.parse() - for _, tweet := range tweets { - if tweet.ID == id { - return tweet, nil + if curBearerToken != bearerToken2 { + s.setBearerToken(curBearerToken) + } + + if err != nil { + return nil, err + } + + tweets := conversation.parse() + for _, tweet := range tweets { + if tweet.ID == id { + return tweet, nil + } } } return nil, fmt.Errorf("tweet with ID %s not found", id) diff --git a/tweets_test.go b/tweets_test.go index eea31ad..4b583df 100644 --- a/tweets_test.go +++ b/tweets_test.go @@ -2,6 +2,7 @@ package twitterscraper_test import ( "context" + "os" "testing" "time" @@ -21,6 +22,10 @@ func TestGetTweets(t *testing.T) { maxTweetsNbr := 300 dupcheck := make(map[string]bool) scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } for tweet := range scraper.GetTweets(context.Background(), "Twitter", maxTweetsNbr) { if tweet.Error != nil { t.Error(tweet.Error) @@ -73,6 +78,10 @@ func TestGetTweets(t *testing.T) { func assertGetTweet(t *testing.T, expectedTweet *twitterscraper.Tweet) { scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } actualTweet, err := scraper.GetTweet(expectedTweet.ID) if err != nil { t.Error(err) @@ -124,6 +133,9 @@ func TestGetTweetWithMultiplePhotos(t *testing.T) { } func TestGetTweetWithGIF(t *testing.T) { + if os.Getenv("SKIP_AUTH_TEST") != "" { + t.Skip("Skipping test due to environment variable") + } expectedTweet := twitterscraper.Tweet{ ConversationID: "1288540609310056450", GIFs: []twitterscraper.GIF{ @@ -148,6 +160,9 @@ func TestGetTweetWithGIF(t *testing.T) { } func TestGetTweetWithPhotoAndGIF(t *testing.T) { + if os.Getenv("SKIP_AUTH_TEST") != "" { + t.Skip("Skipping test due to environment variable") + } expectedTweet := twitterscraper.Tweet{ ConversationID: "1580661436132757506", GIFs: []twitterscraper.GIF{ @@ -178,6 +193,10 @@ func TestTweetMentions(t *testing.T) { Name: "David McRaney", }} scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } tweet, err := scraper.GetTweet("1554522888904101890") if err != nil { t.Error(err) @@ -210,6 +229,10 @@ func TestQuotedAndReply(t *testing.T) { Username: "VsauceTwo", } scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } tweet, err := scraper.GetTweet("1237110897597976576") if err != nil { t.Error(err) @@ -239,6 +262,7 @@ func TestRetweet(t *testing.T) { ConversationID: "1359151057872580612", HTML: "We’ve seen an increase in attacks against Asian communities and individuals around the world. It’s important to know that this isn’t new; throughout history, Asians have experienced violence and exclusion. However, their diverse lived experiences have largely been overlooked.", ID: "1359151057872580612", + IsSelfThread: true, Likes: 6683, Name: "Twitter Together", PermanentURL: "https://twitter.com/TwitterTogether/status/1359151057872580612", @@ -251,6 +275,10 @@ func TestRetweet(t *testing.T) { Username: "TwitterTogether", } scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } tweet, err := scraper.GetTweet("1362849141248974853") if err != nil { t.Error(err) @@ -281,6 +309,10 @@ func TestTweetViews(t *testing.T) { Views: 3189278, } scraper := twitterscraper.New() + err := scraper.LoginOpenAccount() + if err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } tweet, err := scraper.GetTweet("1606055187348688896") if err != nil { t.Error(err) @@ -292,7 +324,14 @@ func TestTweetViews(t *testing.T) { } func TestTweetThread(t *testing.T) { + if os.Getenv("SKIP_AUTH_TEST") != "" { + t.Skip("Skipping test due to environment variable") + } scraper := twitterscraper.New() + err := scraper.Login(username, password) + if err != nil { + t.Fatalf("Login() error = %v", err) + } tweet, err := scraper.GetTweet("1665602315745673217") if err != nil { t.Fatal(err) diff --git a/types.go b/types.go index c1ea306..344d80c 100644 --- a/types.go +++ b/types.go @@ -132,10 +132,13 @@ type ( RetweetedStatusResult struct { Result *result `json:"result"` } `json:"retweeted_status_result"` - QuotedStatusIDStr string `json:"quoted_status_id_str"` - Time time.Time `json:"time"` - UserIDStr string `json:"user_id_str"` - Views struct { + QuotedStatusIDStr string `json:"quoted_status_id_str"` + SelfThread struct { + IDStr string `json:"id_str"` + } `json:"self_thread"` + Time time.Time `json:"time"` + UserIDStr string `json:"user_id_str"` + Views struct { State string `json:"state"` Count string `json:"count"` } `json:"ext_views"`