Add authentication
Breaking changes: removed WithCookie and WithXCsrfToken
This commit is contained in:
parent
3d364abaac
commit
eb18988829
7 changed files with 236 additions and 33 deletions
19
README.md
19
README.md
|
|
@ -168,14 +168,25 @@ func main() {
|
|||
}
|
||||
```
|
||||
|
||||
### Use cookie authentication
|
||||
### Use authentication
|
||||
|
||||
Some specified user tweets are protected that you must login and follow.
|
||||
Cookie and xCsrfToken is optional.
|
||||
It is also required to search.
|
||||
|
||||
```golang
|
||||
scraper.WithCookie("twitter cookie after login")
|
||||
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
|
||||
err := scraper.Login("username", "password")
|
||||
```
|
||||
|
||||
Status of login can be checked with:
|
||||
|
||||
```golang
|
||||
scraper.IsLoggedIn()
|
||||
```
|
||||
|
||||
Logout (clear session):
|
||||
|
||||
```golang
|
||||
scraper.Logout()
|
||||
```
|
||||
|
||||
### Use Proxy
|
||||
|
|
|
|||
17
api.go
17
api.go
|
|
@ -33,10 +33,11 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
|||
req.Header.Set("Authorization", "Bearer "+s.bearerToken)
|
||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||
|
||||
// use cookie
|
||||
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
|
||||
req.Header.Set("Cookie", s.cookie)
|
||||
req.Header.Set("x-csrf-token", s.xCsrfToken)
|
||||
for _, cookie := range s.client.Jar.Cookies(req.URL) {
|
||||
if cookie.Name == "ct0" {
|
||||
req.Header.Set("X-CSRF-Token", cookie.Value)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
|
|
@ -55,7 +56,13 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
|||
s.guestToken = ""
|
||||
}
|
||||
|
||||
return json.NewDecoder(resp.Body).Decode(target)
|
||||
b, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// fmt.Println(string(b))
|
||||
|
||||
return json.Unmarshal(b, target)
|
||||
}
|
||||
|
||||
// GetGuestToken from Twitter API
|
||||
|
|
|
|||
172
auth.go
Normal file
172
auth.go
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
package twitterscraper
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
)
|
||||
|
||||
const (
|
||||
loginURL = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
)
|
||||
|
||||
type flow struct {
|
||||
Errors []struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"errors"`
|
||||
FlowToken string `json:"flow_token"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) {
|
||||
headers := http.Header{
|
||||
"Authorization": []string{"Bearer " + s.bearerToken},
|
||||
"Content-Type": []string{"application/json"},
|
||||
"User-Agent": []string{"Mozilla/5.0 (Linux; Android 11; Nokia G20) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.88 Mobile Safari/537.36"},
|
||||
"X-Guest-Token": []string{s.guestToken},
|
||||
"X-Twitter-Auth-Type": []string{"OAuth2Client"},
|
||||
"X-Twitter-Active-User": []string{"yes"},
|
||||
"X-Twitter-Client-Language": []string{"en"},
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req, err := http.NewRequest("POST", loginURL, bytes.NewReader(jsonData))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header = headers
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var info flow
|
||||
err = json.NewDecoder(resp.Body).Decode(&info)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if len(info.Errors) > 0 {
|
||||
return "", fmt.Errorf("auth error (%d): %v", info.Errors[0].Code, info.Errors[0].Message)
|
||||
}
|
||||
|
||||
return info.FlowToken, nil
|
||||
}
|
||||
|
||||
// IsLoggedIn check if scraper logged in
|
||||
func (s *Scraper) IsLoggedIn() bool {
|
||||
return s.isLogged
|
||||
}
|
||||
|
||||
// Login to Twitter
|
||||
func (s *Scraper) Login(username string, password string) error {
|
||||
s.setBearerToken(bearerToken2)
|
||||
|
||||
err := s.GetGuestToken()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// flow start
|
||||
data := map[string]interface{}{
|
||||
"flow_name": "login",
|
||||
"input_flow_data": map[string]interface{}{
|
||||
"flow_context": map[string]interface{}{
|
||||
"debug_overrides": map[string]interface{}{},
|
||||
"start_location": map[string]interface{}{"location": "splash_screen"},
|
||||
},
|
||||
},
|
||||
}
|
||||
flowToken, err := s.getFlowToken(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// flow instrumentation step
|
||||
data = map[string]interface{}{
|
||||
"flow_token": flowToken,
|
||||
"subtask_inputs": []map[string]interface{}{
|
||||
{
|
||||
"subtask_id": "LoginJsInstrumentationSubtask",
|
||||
"js_instrumentation": map[string]interface{}{"response": "{}", "link": "next_link"},
|
||||
},
|
||||
},
|
||||
}
|
||||
flowToken, err = s.getFlowToken(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// flow username step
|
||||
data = map[string]interface{}{
|
||||
"flow_token": flowToken,
|
||||
"subtask_inputs": []map[string]interface{}{
|
||||
{
|
||||
"subtask_id": "LoginEnterUserIdentifierSSO",
|
||||
"settings_list": map[string]interface{}{
|
||||
"setting_responses": []map[string]interface{}{
|
||||
{
|
||||
"key": "user_identifier",
|
||||
"response_data": map[string]interface{}{"text_data": map[string]interface{}{"result": username}},
|
||||
},
|
||||
},
|
||||
"link": "next_link",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
flowToken, err = s.getFlowToken(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// flow password step
|
||||
data = map[string]interface{}{
|
||||
"flow_token": flowToken,
|
||||
"subtask_inputs": []map[string]interface{}{
|
||||
{
|
||||
"subtask_id": "LoginEnterPassword",
|
||||
"enter_password": map[string]interface{}{"password": password, "link": "next_link"},
|
||||
},
|
||||
},
|
||||
}
|
||||
flowToken, err = s.getFlowToken(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// flow duplication check
|
||||
data = map[string]interface{}{
|
||||
"flow_token": flowToken,
|
||||
"subtask_inputs": []map[string]interface{}{
|
||||
{
|
||||
"subtask_id": "AccountDuplicationCheck",
|
||||
"check_logged_in_account": map[string]interface{}{"link": "AccountDuplicationCheck_false"},
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err = s.getFlowToken(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.isLogged = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Logout is reset session
|
||||
func (s *Scraper) Logout() {
|
||||
s.isLogged = false
|
||||
s.guestToken = ""
|
||||
s.client.Jar, _ = cookiejar.New(nil)
|
||||
s.setBearerToken(bearerToken)
|
||||
}
|
||||
23
scraper.go
23
scraper.go
|
|
@ -5,6 +5,7 @@ import (
|
|||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
|
|
@ -21,11 +22,9 @@ type Scraper struct {
|
|||
guestToken string
|
||||
guestCreatedAt time.Time
|
||||
includeReplies bool
|
||||
isLogged bool
|
||||
searchMode SearchMode
|
||||
wg sync.WaitGroup
|
||||
|
||||
cookie string
|
||||
xCsrfToken string
|
||||
}
|
||||
|
||||
// SearchMode type
|
||||
|
|
@ -51,9 +50,13 @@ var defaultScraper *Scraper
|
|||
|
||||
// New creates a Scraper object
|
||||
func New() *Scraper {
|
||||
jar, _ := cookiejar.New(nil)
|
||||
return &Scraper{
|
||||
bearerToken: bearerToken,
|
||||
client: &http.Client{Timeout: DefaultClientTimeout},
|
||||
client: &http.Client{
|
||||
Jar: jar,
|
||||
Timeout: DefaultClientTimeout,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -100,18 +103,6 @@ func WithReplies(b bool) *Scraper {
|
|||
return defaultScraper.WithReplies(b)
|
||||
}
|
||||
|
||||
// cookie
|
||||
func (s *Scraper) WithCookie(cookie string) *Scraper {
|
||||
s.cookie = cookie
|
||||
return s
|
||||
}
|
||||
|
||||
// x csrf token
|
||||
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
|
||||
s.xCsrfToken = xcsrfToken
|
||||
return s
|
||||
}
|
||||
|
||||
// client timeout
|
||||
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
|
||||
s.client.Timeout = timeout
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package twitterscraper
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
|
|
@ -27,6 +28,10 @@ func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-cha
|
|||
|
||||
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
||||
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
|
||||
if !s.isLogged {
|
||||
return nil, errors.New("scraper is not logged in for search")
|
||||
}
|
||||
|
||||
if maxNbr > 50 {
|
||||
maxNbr = 50
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,19 +2,32 @@ package twitterscraper_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||
)
|
||||
|
||||
var searchScraper = twitterscraper.New()
|
||||
|
||||
func authSearchScraper() error {
|
||||
if searchScraper.IsLoggedIn() {
|
||||
return nil
|
||||
}
|
||||
return searchScraper.Login(os.Getenv("TWITTER_USERNAME"), os.Getenv("TWITTER_PASSWORD"))
|
||||
}
|
||||
|
||||
func TestFetchSearchCursor(t *testing.T) {
|
||||
scraper := twitterscraper.New()
|
||||
err := authSearchScraper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
maxTweetsNbr := 150
|
||||
tweetsNbr := 0
|
||||
nextCursor := ""
|
||||
for tweetsNbr < maxTweetsNbr {
|
||||
tweets, cursor, err := scraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
|
||||
tweets, cursor, err := searchScraper.FetchSearchTweets("twitter", maxTweetsNbr, nextCursor)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
@ -33,8 +46,11 @@ func TestGetSearchProfiles(t *testing.T) {
|
|||
count := 0
|
||||
maxProfilesNbr := 150
|
||||
dupcheck := make(map[string]bool)
|
||||
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
|
||||
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
|
||||
err := authSearchScraper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for profile := range searchScraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
|
||||
if profile.Error != nil {
|
||||
t.Error(profile.Error)
|
||||
} else {
|
||||
|
|
@ -59,8 +75,11 @@ func TestGetSearchTweets(t *testing.T) {
|
|||
count := 0
|
||||
maxTweetsNbr := 150
|
||||
dupcheck := make(map[string]bool)
|
||||
scraper := twitterscraper.New().WithDelay(4)
|
||||
for tweet := range scraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
|
||||
err := authSearchScraper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for tweet := range searchScraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
|
||||
if tweet.Error != nil {
|
||||
t.Error(tweet.Error)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package twitterscraper
|
|||
|
||||
import "fmt"
|
||||
|
||||
var bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
|
||||
// GetTrends return list of trends.
|
||||
func (s *Scraper) GetTrends() ([]string, error) {
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue