Authentication is required!

(quick dirty fix)
Close #115
This commit is contained in:
Alexander Sheiko 2023-07-02 01:41:48 +03:00
parent 5b0c8c3f9f
commit 07ad3789ef
9 changed files with 274 additions and 147 deletions

149
README.md
View file

@ -16,6 +16,75 @@ go get -u github.com/n0madic/twitter-scraper
## Usage
### Authentication
Authentication is required for all methods now!
#### Login
```golang
err := scraper.Login("username", "password")
```
Use username to login, not email!
But if you have email confirmation, use email address in addition:
```golang
err := scraper.Login("username", "password", "email")
```
If you have two-factor authentication, use code:
```golang
err := scraper.Login("username", "password", "code")
```
Status of login can be checked with:
```golang
scraper.IsLoggedIn()
```
Logout (clear session):
```golang
scraper.Logout()
```
If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`.
For example, save cookies:
```golang
cookies := scraper.GetCookies()
// serialize to JSON
js, _ := json.Marshal(cookies)
// save to file
f, _ = os.Create("cookies.json")
f.Write(js)
```
and load cookies:
```golang
f, _ := os.Open("cookies.json")
// deserialize from JSON
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
// load cookies
scraper.SetCookies(cookies)
// check login status
scraper.IsLoggedIn()
```
#### Open account
If you don't want to use your account, you can try login as a Twitter app:
```golang
err := scraper.LoginOpenAccount()
```
### Get user tweets
```golang
@ -29,7 +98,10 @@ import (
func main() {
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err !== nil {
panic(err)
}
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
if tweet.Error != nil {
panic(tweet.Error)
@ -54,6 +126,10 @@ import (
func main() {
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err !== nil {
panic(err)
}
tweet, err := scraper.GetTweet("1328684389388185600")
if err != nil {
panic(err)
@ -124,6 +200,7 @@ import (
func main() {
scraper := twitterscraper.New()
scraper.LoginOpenAccount()
profile, err := scraper.GetProfile("Twitter")
if err != nil {
panic(err)
@ -178,76 +255,6 @@ func main() {
}
```
### Use authentication
Some specified user tweets are protected that you must login and follow.
It is also required to search.
#### Login
```golang
err := scraper.Login("username", "password")
```
Use username to login, not email!
But if you have email confirmation, use email address in addition:
```golang
err := scraper.Login("username", "password", "email")
```
If you have two-factor authentication, use code:
```golang
err := scraper.Login("username", "password", "code")
```
Status of login can be checked with:
```golang
scraper.IsLoggedIn()
```
Logout (clear session):
```golang
scraper.Logout()
```
If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`.
For example, save cookies:
```golang
cookies := scraper.GetCookies()
// serialize to JSON
js, _ := json.Marshal(cookies)
// save to file
f, _ = os.Create("cookies.json")
f.Write(js)
```
and load cookies:
```golang
f, _ := os.Open("cookies.json")
// deserialize from JSON
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
// load cookies
scraper.SetCookies(cookies)
// check login status
scraper.IsLoggedIn()
```
#### Open account
If you don't want to use your account, you can login as a Twitter app:
```golang
err := scraper.LoginOpenAccount()
```
### Use Proxy
Support HTTP(s) and SOCKS5 proxy

View file

@ -289,6 +289,7 @@ func (s *Scraper) Login(credentials ...string) error {
}
s.isLogged = true
s.isOpenAccount = false
return nil
}
@ -339,6 +340,7 @@ func (s *Scraper) LoginOpenAccount() error {
s.oAuthToken = info.Subtasks[0].OpenAccount.OAuthToken
s.oAuthSecret = info.Subtasks[0].OpenAccount.OAuthTokenSecret
s.isLogged = true
s.isOpenAccount = true
}
}
return nil
@ -356,6 +358,7 @@ func (s *Scraper) Logout() error {
}
s.isLogged = false
s.isOpenAccount = false
s.guestToken = ""
s.oAuthToken = ""
s.oAuthSecret = ""

View file

@ -31,6 +31,10 @@ func TestGetProfile(t *testing.T) {
}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
profile, err := scraper.GetProfile("nomadic_ua")
if err != nil {
t.Error(err)
@ -83,6 +87,10 @@ func TestGetProfilePrivate(t *testing.T) {
}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
// some random private profile (found via google)
profile, err := scraper.GetProfile("tomdumont")
if err != nil {
@ -114,7 +122,11 @@ func TestGetProfilePrivate(t *testing.T) {
func TestGetProfileErrorSuspended(t *testing.T) {
scraper := twitterscraper.New()
_, err := scraper.GetProfile("123")
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
_, err = scraper.GetProfile("123")
if err == nil {
t.Error("Expected Error, got success")
} else {
@ -128,7 +140,11 @@ func TestGetProfileErrorNotFound(t *testing.T) {
neUser := "sample3123131"
expectedError := fmt.Sprintf("User '%s' not found", neUser)
scraper := twitterscraper.New()
_, err := scraper.GetProfile(neUser)
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
_, err = scraper.GetProfile(neUser)
if err == nil {
t.Error("Expected Error, got success")
} else {
@ -140,6 +156,10 @@ func TestGetProfileErrorNotFound(t *testing.T) {
func TestGetUserIDByScreenName(t *testing.T) {
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
userID, err := scraper.GetUserIDByScreenName("Twitter")
if err != nil {
t.Errorf("getUserByScreenName() error = %v", err)

View file

@ -23,6 +23,7 @@ type Scraper struct {
guestCreatedAt time.Time
includeReplies bool
isLogged bool
isOpenAccount bool
oAuthToken string
oAuthSecret string
proxy string

View file

@ -2,7 +2,6 @@ package twitterscraper_test
import (
"context"
"os"
"testing"
twitterscraper "github.com/n0madic/twitter-scraper"
@ -11,9 +10,6 @@ import (
var searchScraper = twitterscraper.New()
func TestFetchSearchCursor(t *testing.T) {
if os.Getenv("SKIP_AUTH_TEST") != "" {
t.Skip("Skipping test due to environment variable")
}
err := searchScraper.LoginOpenAccount()
if err != nil {
t.Fatal(err)

View file

@ -89,6 +89,7 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet {
name := timeline.GlobalObjects.Users[tweet.UserIDStr].Name
tw := &Tweet{
ID: id,
ConversationID: tweet.ConversationIDStr,
Likes: tweet.FavoriteCount,
Name: name,
PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, id),
@ -125,6 +126,10 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet {
tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
}
if tweet.SelfThread.IDStr == id {
tw.IsSelfThread = true
}
if tweet.Views.Count != "" {
views, viewsErr := strconv.Atoi(tweet.Views.Count)
if viewsErr != nil {

View file

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"net/url"
"strconv"
)
// GetTweets returns channel with tweets for a given user.
@ -18,6 +19,9 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*
return nil, "", err
}
if s.isOpenAccount {
return s.FetchTweetsByUserIDLegacy(userID, maxTweetsNbr, cursor)
}
return s.FetchTweetsByUserID(userID, maxTweetsNbr, cursor)
}
@ -83,8 +87,56 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st
return tweets, nextCursor, nil
}
// FetchTweetsByUserIDLegacy gets tweets for a given userID, via the Twitter frontend legacy API.
func (s *Scraper) FetchTweetsByUserIDLegacy(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
if maxTweetsNbr > 200 {
maxTweetsNbr = 200
}
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json")
if err != nil {
return nil, "", err
}
q := req.URL.Query()
q.Add("count", strconv.Itoa(maxTweetsNbr))
q.Add("userId", userID)
if cursor != "" {
q.Add("cursor", cursor)
}
req.URL.RawQuery = q.Encode()
var timeline timelineV1
err = s.RequestAPI(req, &timeline)
if err != nil {
return nil, "", err
}
tweets, nextCursor := timeline.parseTweets()
return tweets, nextCursor, nil
}
// GetTweet get a single tweet by ID.
func (s *Scraper) GetTweet(id string) (*Tweet, error) {
if s.isOpenAccount {
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/conversation/"+id+".json")
if err != nil {
return nil, err
}
var timeline timelineV1
err = s.RequestAPI(req, &timeline)
if err != nil {
return nil, err
}
tweets, _ := timeline.parseTweets()
for _, tweet := range tweets {
if tweet.ID == id {
return tweet, nil
}
}
} else {
req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail")
if err != nil {
return nil, err
@ -153,5 +205,6 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) {
return tweet, nil
}
}
}
return nil, fmt.Errorf("tweet with ID %s not found", id)
}

View file

@ -2,6 +2,7 @@ package twitterscraper_test
import (
"context"
"os"
"testing"
"time"
@ -21,6 +22,10 @@ func TestGetTweets(t *testing.T) {
maxTweetsNbr := 300
dupcheck := make(map[string]bool)
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
for tweet := range scraper.GetTweets(context.Background(), "Twitter", maxTweetsNbr) {
if tweet.Error != nil {
t.Error(tweet.Error)
@ -73,6 +78,10 @@ func TestGetTweets(t *testing.T) {
func assertGetTweet(t *testing.T, expectedTweet *twitterscraper.Tweet) {
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
actualTweet, err := scraper.GetTweet(expectedTweet.ID)
if err != nil {
t.Error(err)
@ -124,6 +133,9 @@ func TestGetTweetWithMultiplePhotos(t *testing.T) {
}
func TestGetTweetWithGIF(t *testing.T) {
if os.Getenv("SKIP_AUTH_TEST") != "" {
t.Skip("Skipping test due to environment variable")
}
expectedTweet := twitterscraper.Tweet{
ConversationID: "1288540609310056450",
GIFs: []twitterscraper.GIF{
@ -148,6 +160,9 @@ func TestGetTweetWithGIF(t *testing.T) {
}
func TestGetTweetWithPhotoAndGIF(t *testing.T) {
if os.Getenv("SKIP_AUTH_TEST") != "" {
t.Skip("Skipping test due to environment variable")
}
expectedTweet := twitterscraper.Tweet{
ConversationID: "1580661436132757506",
GIFs: []twitterscraper.GIF{
@ -178,6 +193,10 @@ func TestTweetMentions(t *testing.T) {
Name: "David McRaney",
}}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
tweet, err := scraper.GetTweet("1554522888904101890")
if err != nil {
t.Error(err)
@ -210,6 +229,10 @@ func TestQuotedAndReply(t *testing.T) {
Username: "VsauceTwo",
}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
tweet, err := scraper.GetTweet("1237110897597976576")
if err != nil {
t.Error(err)
@ -239,6 +262,7 @@ func TestRetweet(t *testing.T) {
ConversationID: "1359151057872580612",
HTML: "Weve seen an increase in attacks against Asian communities and individuals around the world. Its important to know that this isnt new; throughout history, Asians have experienced violence and exclusion. However, their diverse lived experiences have largely been overlooked.",
ID: "1359151057872580612",
IsSelfThread: true,
Likes: 6683,
Name: "Twitter Together",
PermanentURL: "https://twitter.com/TwitterTogether/status/1359151057872580612",
@ -251,6 +275,10 @@ func TestRetweet(t *testing.T) {
Username: "TwitterTogether",
}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
tweet, err := scraper.GetTweet("1362849141248974853")
if err != nil {
t.Error(err)
@ -281,6 +309,10 @@ func TestTweetViews(t *testing.T) {
Views: 3189278,
}
scraper := twitterscraper.New()
err := scraper.LoginOpenAccount()
if err != nil {
t.Fatalf("LoginOpenAccount() error = %v", err)
}
tweet, err := scraper.GetTweet("1606055187348688896")
if err != nil {
t.Error(err)
@ -292,7 +324,14 @@ func TestTweetViews(t *testing.T) {
}
func TestTweetThread(t *testing.T) {
if os.Getenv("SKIP_AUTH_TEST") != "" {
t.Skip("Skipping test due to environment variable")
}
scraper := twitterscraper.New()
err := scraper.Login(username, password)
if err != nil {
t.Fatalf("Login() error = %v", err)
}
tweet, err := scraper.GetTweet("1665602315745673217")
if err != nil {
t.Fatal(err)

View file

@ -133,6 +133,9 @@ type (
Result *result `json:"result"`
} `json:"retweeted_status_result"`
QuotedStatusIDStr string `json:"quoted_status_id_str"`
SelfThread struct {
IDStr string `json:"id_str"`
} `json:"self_thread"`
Time time.Time `json:"time"`
UserIDStr string `json:"user_id_str"`
Views struct {