Add authentication

Breaking changes: removed WithCookie and WithXCsrfToken
This commit is contained in:
Alexander Sheiko 2023-04-23 17:32:28 +03:00
parent 3d364abaac
commit eb18988829
7 changed files with 236 additions and 33 deletions

View file

@ -168,14 +168,25 @@ func main() {
}
```
### Use cookie authentication
### Use authentication
Some specified user tweets are protected that you must login and follow.
Cookie and xCsrfToken is optional.
It is also required to search.
```golang
scraper.WithCookie("twitter cookie after login")
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
err := scraper.Login("username", "password")
```
Status of login can be checked with:
```golang
scraper.IsLoggedIn()
```
Logout (clear session):
```golang
scraper.Logout()
```
### Use Proxy

17
api.go
View file

@ -33,10 +33,11 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
req.Header.Set("Authorization", "Bearer "+s.bearerToken)
req.Header.Set("X-Guest-Token", s.guestToken)
// use cookie
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
req.Header.Set("Cookie", s.cookie)
req.Header.Set("x-csrf-token", s.xCsrfToken)
for _, cookie := range s.client.Jar.Cookies(req.URL) {
if cookie.Name == "ct0" {
req.Header.Set("X-CSRF-Token", cookie.Value)
break
}
}
resp, err := s.client.Do(req)
@ -55,7 +56,13 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
s.guestToken = ""
}
return json.NewDecoder(resp.Body).Decode(target)
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
// fmt.Println(string(b))
return json.Unmarshal(b, target)
}
// GetGuestToken from Twitter API

172
auth.go Normal file
View file

@ -0,0 +1,172 @@
package twitterscraper
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"net/http/cookiejar"
)
const (
loginURL = "https://api.twitter.com/1.1/onboarding/task.json"
bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
)
type flow struct {
Errors []struct {
Code int `json:"code"`
Message string `json:"message"`
} `json:"errors"`
FlowToken string `json:"flow_token"`
Status string `json:"status"`
}
func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) {
headers := http.Header{
"Authorization": []string{"Bearer " + s.bearerToken},
"Content-Type": []string{"application/json"},
"User-Agent": []string{"Mozilla/5.0 (Linux; Android 11; Nokia G20) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.88 Mobile Safari/537.36"},
"X-Guest-Token": []string{s.guestToken},
"X-Twitter-Auth-Type": []string{"OAuth2Client"},
"X-Twitter-Active-User": []string{"yes"},
"X-Twitter-Client-Language": []string{"en"},
}
jsonData, err := json.Marshal(data)
if err != nil {
return "", err
}
req, err := http.NewRequest("POST", loginURL, bytes.NewReader(jsonData))
if err != nil {
return "", err
}
req.Header = headers
resp, err := s.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
var info flow
err = json.NewDecoder(resp.Body).Decode(&info)
if err != nil {
return "", err
}
if len(info.Errors) > 0 {
return "", fmt.Errorf("auth error (%d): %v", info.Errors[0].Code, info.Errors[0].Message)
}
return info.FlowToken, nil
}
// IsLoggedIn check if scraper logged in
func (s *Scraper) IsLoggedIn() bool {
return s.isLogged
}
// Login to Twitter
func (s *Scraper) Login(username string, password string) error {
s.setBearerToken(bearerToken2)
err := s.GetGuestToken()
if err != nil {
return err
}
// flow start
data := map[string]interface{}{
"flow_name": "login",
"input_flow_data": map[string]interface{}{
"flow_context": map[string]interface{}{
"debug_overrides": map[string]interface{}{},
"start_location": map[string]interface{}{"location": "splash_screen"},
},
},
}
flowToken, err := s.getFlowToken(data)
if err != nil {
return err
}
// flow instrumentation step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": map[string]interface{}{"response": "{}", "link": "next_link"},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow username step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": map[string]interface{}{
"setting_responses": []map[string]interface{}{
{
"key": "user_identifier",
"response_data": map[string]interface{}{"text_data": map[string]interface{}{"result": username}},
},
},
"link": "next_link",
},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow password step
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "LoginEnterPassword",
"enter_password": map[string]interface{}{"password": password, "link": "next_link"},
},
},
}
flowToken, err = s.getFlowToken(data)
if err != nil {
return err
}
// flow duplication check
data = map[string]interface{}{
"flow_token": flowToken,
"subtask_inputs": []map[string]interface{}{
{
"subtask_id": "AccountDuplicationCheck",
"check_logged_in_account": map[string]interface{}{"link": "AccountDuplicationCheck_false"},
},
},
}
_, err = s.getFlowToken(data)
if err != nil {
return err
}
s.isLogged = true
return nil
}
// Logout is reset session
func (s *Scraper) Logout() {
s.isLogged = false
s.guestToken = ""
s.client.Jar, _ = cookiejar.New(nil)
s.setBearerToken(bearerToken)
}

View file

@ -5,6 +5,7 @@ import (
"errors"
"net"
"net/http"
"net/http/cookiejar"
"net/url"
"strings"
"sync"
@ -21,11 +22,9 @@ type Scraper struct {
guestToken string
guestCreatedAt time.Time
includeReplies bool
isLogged bool
searchMode SearchMode
wg sync.WaitGroup
cookie string
xCsrfToken string
}
// SearchMode type
@ -51,9 +50,13 @@ var defaultScraper *Scraper
// New creates a Scraper object
func New() *Scraper {
jar, _ := cookiejar.New(nil)
return &Scraper{
bearerToken: bearerToken,
client: &http.Client{Timeout: DefaultClientTimeout},
client: &http.Client{
Jar: jar,
Timeout: DefaultClientTimeout,
},
}
}
@ -100,18 +103,6 @@ func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b)
}
// cookie
func (s *Scraper) WithCookie(cookie string) *Scraper {
s.cookie = cookie
return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.xCsrfToken = xcsrfToken
return s
}
// client timeout
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
s.client.Timeout = timeout

View file

@ -2,6 +2,7 @@ package twitterscraper
import (
"context"
"errors"
"strconv"
)
@ -27,6 +28,10 @@ func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-cha
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
if !s.isLogged {
return nil, errors.New("scraper is not logged in for search")
}
if maxNbr > 50 {
maxNbr = 50
}

View file

@ -2,19 +2,32 @@ package twitterscraper_test
import (
"context"
"os"
"strings"
"testing"
twitterscraper "github.com/n0madic/twitter-scraper"
)
var searchScraper = twitterscraper.New()
func authSearchScraper() error {
if searchScraper.IsLoggedIn() {
return nil
}
return searchScraper.Login(os.Getenv("TWITTER_USERNAME"), os.Getenv("TWITTER_PASSWORD"))
}
func TestFetchSearchCursor(t *testing.T) {
scraper := twitterscraper.New()
err := authSearchScraper()
if err != nil {
t.Fatal(err)
}
maxTweetsNbr := 150
tweetsNbr := 0
nextCursor := ""
for tweetsNbr < maxTweetsNbr {
tweets, cursor, err := scraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
tweets, cursor, err := searchScraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
if err != nil {
t.Fatal(err)
}
@ -33,8 +46,11 @@ func TestGetSearchProfiles(t *testing.T) {
count := 0
maxProfilesNbr := 150
dupcheck := make(map[string]bool)
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
err := authSearchScraper()
if err != nil {
t.Fatal(err)
}
for profile := range searchScraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
if profile.Error != nil {
t.Error(profile.Error)
} else {
@ -59,8 +75,11 @@ func TestGetSearchTweets(t *testing.T) {
count := 0
maxTweetsNbr := 150
dupcheck := make(map[string]bool)
scraper := twitterscraper.New().WithDelay(4)
for tweet := range scraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
err := authSearchScraper()
if err != nil {
t.Fatal(err)
}
for tweet := range searchScraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
if tweet.Error != nil {
t.Error(tweet.Error)
} else {

View file

@ -2,8 +2,6 @@ package twitterscraper
import "fmt"
var bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
// GetTrends return list of trends.
func (s *Scraper) GetTrends() ([]string, error) {
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")