Add authentication

Breaking changes: removed WithCookie and WithXCsrfToken
This commit is contained in:
Alexander Sheiko 2023-04-23 17:32:28 +03:00
parent 3d364abaac
commit eb18988829
7 changed files with 236 additions and 33 deletions

View file

@ -168,14 +168,25 @@ func main() {
} }
``` ```
### Use cookie authentication ### Use authentication
Some specified user tweets are protected that you must login and follow. Some specified user tweets are protected that you must login and follow.
Cookie and xCsrfToken is optional. It is also required to search.
```golang ```golang
scraper.WithCookie("twitter cookie after login") err := scraper.Login("username", "password")
scraper.WithXCsrfToken("twitter X-Csrf-Token after login") ```
Status of login can be checked with:
```golang
scraper.IsLoggedIn()
```
Logout (clear session):
```golang
scraper.Logout()
``` ```
### Use Proxy ### Use Proxy

17
api.go
View file

@ -33,10 +33,11 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
req.Header.Set("Authorization", "Bearer "+s.bearerToken) req.Header.Set("Authorization", "Bearer "+s.bearerToken)
req.Header.Set("X-Guest-Token", s.guestToken) req.Header.Set("X-Guest-Token", s.guestToken)
// use cookie for _, cookie := range s.client.Jar.Cookies(req.URL) {
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 { if cookie.Name == "ct0" {
req.Header.Set("Cookie", s.cookie) req.Header.Set("X-CSRF-Token", cookie.Value)
req.Header.Set("x-csrf-token", s.xCsrfToken) break
}
} }
resp, err := s.client.Do(req) resp, err := s.client.Do(req)
@ -55,7 +56,13 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
s.guestToken = "" s.guestToken = ""
} }
return json.NewDecoder(resp.Body).Decode(target) b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
// fmt.Println(string(b))
return json.Unmarshal(b, target)
} }
// GetGuestToken from Twitter API // GetGuestToken from Twitter API

172
auth.go Normal file
View file

@ -0,0 +1,172 @@
package twitterscraper
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"net/http/cookiejar"
)
const (
loginURL = "https://api.twitter.com/1.1/onboarding/task.json"
bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
)
type flow struct {
Errors []struct {
Code int `json:"code"`
Message string `json:"message"`
} `json:"errors"`
FlowToken string `json:"flow_token"`
Status string `json:"status"`
}
func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) {
headers := http.Header{
"Authorization": []string{"Bearer " + s.bearerToken},
"Content-Type": []string{"application/json"},
"User-Agent": []string{"Mozilla/5.0 (Linux; Android 11; Nokia G20) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.88 Mobile Safari/537.36"},
"X-Guest-Token": []string{s.guestToken},
"X-Twitter-Auth-Type": []string{"OAuth2Client"},
"X-Twitter-Active-User": []string{"yes"},
"X-Twitter-Client-Language": []string{"en"},
}
jsonData, err := json.Marshal(data)
if err != nil {
return "", err
}
req, err := http.NewRequest("POST", loginURL, bytes.NewReader(jsonData))
if err != nil {
return "", err
}
req.Header = headers
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
var info flow
err = json.NewDecoder(resp.Body).Decode(&info)
if err != nil {
return "", err
}
if len(info.Errors) > 0 {
return "", fmt.Errorf("auth error (%d): %v", info.Errors[0].Code, info.Errors[0].Message)
}
return info.FlowToken, nil
}
// IsLoggedIn check if scraper logged in
func (s *Scraper) IsLoggedIn() bool {
return s.isLogged
}
// Login to Twitter
func (s *Scraper) Login(username string, password string) error {
s.setBearerToken(bearerToken2)
err := s.GetGuestToken()
if err != nil {
return err
}
// flow start
data := map[string]interface{}{
"flow_name": "login",
"input_flow_data": map[string]interface{}{
"flow_context": map[string]interface{}{
"debug_overrides": map[string]interface{}{},
"start_location": map[string]interface{}{"location": "splash_screen"},
},
},
}
flowToken, err := s.getFlowToken(data)
if err != nil {
return err
}
// flow instrumentation step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": map[string]interface{}{"response": "{}", "link": "next_link"},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow username step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": map[string]interface{}{
"setting_responses": []map[string]interface{}{
{
"key": "user_identifier",
"response_data": map[string]interface{}{"text_data": map[string]interface{}{"result": username}},
},
},
"link": "next_link",
},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow password step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginEnterPassword",
"enter_password": map[string]interface{}{"password": password, "link": "next_link"},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow duplication check
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "AccountDuplicationCheck",
"check_logged_in_account": map[string]interface{}{"link": "AccountDuplicationCheck_false"},
},
},
}
_, err = s.getFlowToken(data)
if err != nil {
return err
}
s.isLogged = true
return nil
}
// Logout is reset session
func (s *Scraper) Logout() {
s.isLogged = false
s.guestToken = ""
s.client.Jar, _ = cookiejar.New(nil)
s.setBearerToken(bearerToken)
}

View file

@ -5,6 +5,7 @@ import (
"errors" "errors"
"net" "net"
"net/http" "net/http"
"net/http/cookiejar"
"net/url" "net/url"
"strings" "strings"
"sync" "sync"
@ -21,11 +22,9 @@ type Scraper struct {
guestToken string guestToken string
guestCreatedAt time.Time guestCreatedAt time.Time
includeReplies bool includeReplies bool
isLogged bool
searchMode SearchMode searchMode SearchMode
wg sync.WaitGroup wg sync.WaitGroup
cookie string
xCsrfToken string
} }
// SearchMode type // SearchMode type
@ -51,9 +50,13 @@ var defaultScraper *Scraper
// New creates a Scraper object // New creates a Scraper object
func New() *Scraper { func New() *Scraper {
jar, _ := cookiejar.New(nil)
return &Scraper{ return &Scraper{
bearerToken: bearerToken, bearerToken: bearerToken,
client: &http.Client{Timeout: DefaultClientTimeout}, client: &http.Client{
Jar: jar,
Timeout: DefaultClientTimeout,
},
} }
} }
@ -100,18 +103,6 @@ func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b) return defaultScraper.WithReplies(b)
} }
// cookie
func (s *Scraper) WithCookie(cookie string) *Scraper {
s.cookie = cookie
return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.xCsrfToken = xcsrfToken
return s
}
// client timeout // client timeout
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper { func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
s.client.Timeout = timeout s.client.Timeout = timeout

View file

@ -2,6 +2,7 @@ package twitterscraper
import ( import (
"context" "context"
"errors"
"strconv" "strconv"
) )
@ -27,6 +28,10 @@ func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-cha
// getSearchTimeline gets results for a given search query, via the Twitter frontend API // getSearchTimeline gets results for a given search query, via the Twitter frontend API
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) { func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
if !s.isLogged {
return nil, errors.New("scraper is not logged in for search")
}
if maxNbr > 50 { if maxNbr > 50 {
maxNbr = 50 maxNbr = 50
} }

View file

@ -2,19 +2,32 @@ package twitterscraper_test
import ( import (
"context" "context"
"os"
"strings" "strings"
"testing" "testing"
twitterscraper "github.com/n0madic/twitter-scraper" twitterscraper "github.com/n0madic/twitter-scraper"
) )
var searchScraper = twitterscraper.New()
func authSearchScraper() error {
if searchScraper.IsLoggedIn() {
return nil
}
return searchScraper.Login(os.Getenv("TWITTER_USERNAME"), os.Getenv("TWITTER_PASSWORD"))
}
func TestFetchSearchCursor(t *testing.T) { func TestFetchSearchCursor(t *testing.T) {
scraper := twitterscraper.New() err := authSearchScraper()
if err != nil {
t.Fatal(err)
}
maxTweetsNbr := 150 maxTweetsNbr := 150
tweetsNbr := 0 tweetsNbr := 0
nextCursor := "" nextCursor := ""
for tweetsNbr < maxTweetsNbr { for tweetsNbr < maxTweetsNbr {
tweets, cursor, err := scraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor) tweets, cursor, err := searchScraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -33,8 +46,11 @@ func TestGetSearchProfiles(t *testing.T) {
count := 0 count := 0
maxProfilesNbr := 150 maxProfilesNbr := 150
dupcheck := make(map[string]bool) dupcheck := make(map[string]bool)
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers) err := authSearchScraper()
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) { if err != nil {
t.Fatal(err)
}
for profile := range searchScraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
if profile.Error != nil { if profile.Error != nil {
t.Error(profile.Error) t.Error(profile.Error)
} else { } else {
@ -59,8 +75,11 @@ func TestGetSearchTweets(t *testing.T) {
count := 0 count := 0
maxTweetsNbr := 150 maxTweetsNbr := 150
dupcheck := make(map[string]bool) dupcheck := make(map[string]bool)
scraper := twitterscraper.New().WithDelay(4) err := authSearchScraper()
for tweet := range scraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) { if err != nil {
t.Fatal(err)
}
for tweet := range searchScraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
if tweet.Error != nil { if tweet.Error != nil {
t.Error(tweet.Error) t.Error(tweet.Error)
} else { } else {

View file

@ -2,8 +2,6 @@ package twitterscraper
import "fmt" import "fmt"
var bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
// GetTrends return list of trends. // GetTrends return list of trends.
func (s *Scraper) GetTrends() ([]string, error) { func (s *Scraper) GetTrends() ([]string, error) {
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json") req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")