Add authentication
Breaking changes: removed WithCookie and WithXCsrfToken
This commit is contained in:
parent
3d364abaac
commit
eb18988829
7 changed files with 236 additions and 33 deletions
19
README.md
19
README.md
|
|
@ -168,14 +168,25 @@ func main() {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use cookie authentication
|
### Use authentication
|
||||||
|
|
||||||
Some specified user tweets are protected that you must login and follow.
|
Some specified user tweets are protected that you must login and follow.
|
||||||
Cookie and xCsrfToken is optional.
|
It is also required to search.
|
||||||
|
|
||||||
```golang
|
```golang
|
||||||
scraper.WithCookie("twitter cookie after login")
|
err := scraper.Login("username", "password")
|
||||||
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
|
```
|
||||||
|
|
||||||
|
Status of login can be checked with:
|
||||||
|
|
||||||
|
```golang
|
||||||
|
scraper.IsLoggedIn()
|
||||||
|
```
|
||||||
|
|
||||||
|
Logout (clear session):
|
||||||
|
|
||||||
|
```golang
|
||||||
|
scraper.Logout()
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use Proxy
|
### Use Proxy
|
||||||
|
|
|
||||||
17
api.go
17
api.go
|
|
@ -33,10 +33,11 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||||
req.Header.Set("Authorization", "Bearer "+s.bearerToken)
|
req.Header.Set("Authorization", "Bearer "+s.bearerToken)
|
||||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||||
|
|
||||||
// use cookie
|
for _, cookie := range s.client.Jar.Cookies(req.URL) {
|
||||||
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
|
if cookie.Name == "ct0" {
|
||||||
req.Header.Set("Cookie", s.cookie)
|
req.Header.Set("X-CSRF-Token", cookie.Value)
|
||||||
req.Header.Set("x-csrf-token", s.xCsrfToken)
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := s.client.Do(req)
|
resp, err := s.client.Do(req)
|
||||||
|
|
@ -55,7 +56,13 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||||
s.guestToken = ""
|
s.guestToken = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
return json.NewDecoder(resp.Body).Decode(target)
|
b, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// fmt.Println(string(b))
|
||||||
|
|
||||||
|
return json.Unmarshal(b, target)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetGuestToken from Twitter API
|
// GetGuestToken from Twitter API
|
||||||
|
|
|
||||||
172
auth.go
Normal file
172
auth.go
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/cookiejar"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
loginURL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||||
|
bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||||
|
)
|
||||||
|
|
||||||
|
type flow struct {
|
||||||
|
Errors []struct {
|
||||||
|
Code int `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
} `json:"errors"`
|
||||||
|
FlowToken string `json:"flow_token"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) {
|
||||||
|
headers := http.Header{
|
||||||
|
"Authorization": []string{"Bearer " + s.bearerToken},
|
||||||
|
"Content-Type": []string{"application/json"},
|
||||||
|
"User-Agent": []string{"Mozilla/5.0 (Linux; Android 11; Nokia G20) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.88 Mobile Safari/537.36"},
|
||||||
|
"X-Guest-Token": []string{s.guestToken},
|
||||||
|
"X-Twitter-Auth-Type": []string{"OAuth2Client"},
|
||||||
|
"X-Twitter-Active-User": []string{"yes"},
|
||||||
|
"X-Twitter-Client-Language": []string{"en"},
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonData, err := json.Marshal(data)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest("POST", loginURL, bytes.NewReader(jsonData))
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
req.Header = headers
|
||||||
|
|
||||||
|
resp, err := s.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
var info flow
|
||||||
|
err = json.NewDecoder(resp.Body).Decode(&info)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(info.Errors) > 0 {
|
||||||
|
return "", fmt.Errorf("auth error (%d): %v", info.Errors[0].Code, info.Errors[0].Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return info.FlowToken, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsLoggedIn check if scraper logged in
|
||||||
|
func (s *Scraper) IsLoggedIn() bool {
|
||||||
|
return s.isLogged
|
||||||
|
}
|
||||||
|
|
||||||
|
// Login to Twitter
|
||||||
|
func (s *Scraper) Login(username string, password string) error {
|
||||||
|
s.setBearerToken(bearerToken2)
|
||||||
|
|
||||||
|
err := s.GetGuestToken()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// flow start
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"flow_name": "login",
|
||||||
|
"input_flow_data": map[string]interface{}{
|
||||||
|
"flow_context": map[string]interface{}{
|
||||||
|
"debug_overrides": map[string]interface{}{},
|
||||||
|
"start_location": map[string]interface{}{"location": "splash_screen"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
flowToken, err := s.getFlowToken(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// flow instrumentation step
|
||||||
|
data = map[string]interface{}{
|
||||||
|
"flow_token": flowToken,
|
||||||
|
"subtask_inputs": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"subtask_id": "LoginJsInstrumentationSubtask",
|
||||||
|
"js_instrumentation": map[string]interface{}{"response": "{}", "link": "next_link"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
flowToken, err = s.getFlowToken(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// flow username step
|
||||||
|
data = map[string]interface{}{
|
||||||
|
"flow_token": flowToken,
|
||||||
|
"subtask_inputs": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"subtask_id": "LoginEnterUserIdentifierSSO",
|
||||||
|
"settings_list": map[string]interface{}{
|
||||||
|
"setting_responses": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"key": "user_identifier",
|
||||||
|
"response_data": map[string]interface{}{"text_data": map[string]interface{}{"result": username}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"link": "next_link",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
flowToken, err = s.getFlowToken(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// flow password step
|
||||||
|
data = map[string]interface{}{
|
||||||
|
"flow_token": flowToken,
|
||||||
|
"subtask_inputs": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"subtask_id": "LoginEnterPassword",
|
||||||
|
"enter_password": map[string]interface{}{"password": password, "link": "next_link"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
flowToken, err = s.getFlowToken(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// flow duplication check
|
||||||
|
data = map[string]interface{}{
|
||||||
|
"flow_token": flowToken,
|
||||||
|
"subtask_inputs": []map[string]interface{}{
|
||||||
|
{
|
||||||
|
"subtask_id": "AccountDuplicationCheck",
|
||||||
|
"check_logged_in_account": map[string]interface{}{"link": "AccountDuplicationCheck_false"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err = s.getFlowToken(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.isLogged = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Logout is reset session
|
||||||
|
func (s *Scraper) Logout() {
|
||||||
|
s.isLogged = false
|
||||||
|
s.guestToken = ""
|
||||||
|
s.client.Jar, _ = cookiejar.New(nil)
|
||||||
|
s.setBearerToken(bearerToken)
|
||||||
|
}
|
||||||
23
scraper.go
23
scraper.go
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/http/cookiejar"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
@ -21,11 +22,9 @@ type Scraper struct {
|
||||||
guestToken string
|
guestToken string
|
||||||
guestCreatedAt time.Time
|
guestCreatedAt time.Time
|
||||||
includeReplies bool
|
includeReplies bool
|
||||||
|
isLogged bool
|
||||||
searchMode SearchMode
|
searchMode SearchMode
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
|
||||||
cookie string
|
|
||||||
xCsrfToken string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchMode type
|
// SearchMode type
|
||||||
|
|
@ -51,9 +50,13 @@ var defaultScraper *Scraper
|
||||||
|
|
||||||
// New creates a Scraper object
|
// New creates a Scraper object
|
||||||
func New() *Scraper {
|
func New() *Scraper {
|
||||||
|
jar, _ := cookiejar.New(nil)
|
||||||
return &Scraper{
|
return &Scraper{
|
||||||
bearerToken: bearerToken,
|
bearerToken: bearerToken,
|
||||||
client: &http.Client{Timeout: DefaultClientTimeout},
|
client: &http.Client{
|
||||||
|
Jar: jar,
|
||||||
|
Timeout: DefaultClientTimeout,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,18 +103,6 @@ func WithReplies(b bool) *Scraper {
|
||||||
return defaultScraper.WithReplies(b)
|
return defaultScraper.WithReplies(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
// cookie
|
|
||||||
func (s *Scraper) WithCookie(cookie string) *Scraper {
|
|
||||||
s.cookie = cookie
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// x csrf token
|
|
||||||
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
|
|
||||||
s.xCsrfToken = xcsrfToken
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// client timeout
|
// client timeout
|
||||||
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
|
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
|
||||||
s.client.Timeout = timeout
|
s.client.Timeout = timeout
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package twitterscraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"strconv"
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -27,6 +28,10 @@ func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-cha
|
||||||
|
|
||||||
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
||||||
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
|
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
|
||||||
|
if !s.isLogged {
|
||||||
|
return nil, errors.New("scraper is not logged in for search")
|
||||||
|
}
|
||||||
|
|
||||||
if maxNbr > 50 {
|
if maxNbr > 50 {
|
||||||
maxNbr = 50
|
maxNbr = 50
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,19 +2,32 @@ package twitterscraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
twitterscraper "github.com/n0madic/twitter-scraper"
|
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var searchScraper = twitterscraper.New()
|
||||||
|
|
||||||
|
func authSearchScraper() error {
|
||||||
|
if searchScraper.IsLoggedIn() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return searchScraper.Login(os.Getenv("TWITTER_USERNAME"), os.Getenv("TWITTER_PASSWORD"))
|
||||||
|
}
|
||||||
|
|
||||||
func TestFetchSearchCursor(t *testing.T) {
|
func TestFetchSearchCursor(t *testing.T) {
|
||||||
scraper := twitterscraper.New()
|
err := authSearchScraper()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
maxTweetsNbr := 150
|
maxTweetsNbr := 150
|
||||||
tweetsNbr := 0
|
tweetsNbr := 0
|
||||||
nextCursor := ""
|
nextCursor := ""
|
||||||
for tweetsNbr < maxTweetsNbr {
|
for tweetsNbr < maxTweetsNbr {
|
||||||
tweets, cursor, err := scraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
|
tweets, cursor, err := searchScraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
@ -33,8 +46,11 @@ func TestGetSearchProfiles(t *testing.T) {
|
||||||
count := 0
|
count := 0
|
||||||
maxProfilesNbr := 150
|
maxProfilesNbr := 150
|
||||||
dupcheck := make(map[string]bool)
|
dupcheck := make(map[string]bool)
|
||||||
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
|
err := authSearchScraper()
|
||||||
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
for profile := range searchScraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
|
||||||
if profile.Error != nil {
|
if profile.Error != nil {
|
||||||
t.Error(profile.Error)
|
t.Error(profile.Error)
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -59,8 +75,11 @@ func TestGetSearchTweets(t *testing.T) {
|
||||||
count := 0
|
count := 0
|
||||||
maxTweetsNbr := 150
|
maxTweetsNbr := 150
|
||||||
dupcheck := make(map[string]bool)
|
dupcheck := make(map[string]bool)
|
||||||
scraper := twitterscraper.New().WithDelay(4)
|
err := authSearchScraper()
|
||||||
for tweet := range scraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
for tweet := range searchScraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
|
||||||
if tweet.Error != nil {
|
if tweet.Error != nil {
|
||||||
t.Error(tweet.Error)
|
t.Error(tweet.Error)
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@ package twitterscraper
|
||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
var bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
|
||||||
|
|
||||||
// GetTrends return list of trends.
|
// GetTrends return list of trends.
|
||||||
func (s *Scraper) GetTrends() ([]string, error) {
|
func (s *Scraper) GetTrends() ([]string, error) {
|
||||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")
|
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue