add cookie and x-csrf-token

This commit is contained in:
one.cat 2021-09-08 16:02:53 +08:00
parent d8dd7d54ea
commit 31c9e5da5a
5 changed files with 58 additions and 1 deletions

6
api.go
View file

@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
req.Header.Set("Authorization", "Bearer "+bearerToken) req.Header.Set("Authorization", "Bearer "+bearerToken)
req.Header.Set("X-Guest-Token", s.guestToken) req.Header.Set("X-Guest-Token", s.guestToken)
// use cookie
if len(s.Cookie) > 0 && len(s.XCsrfToken) > 0 {
req.Header.Set("Cookie", s.Cookie)
req.Header.Set("x-csrf-token", s.XCsrfToken)
}
resp, err := s.client.Do(req) resp, err := s.client.Do(req)
if err != nil { if err != nil {
return err return err

5
go.mod
View file

@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
go 1.13 go 1.13
require github.com/google/go-cmp v0.5.4 require (
github.com/google/go-cmp v0.5.4
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
)

7
go.sum
View file

@ -1,4 +1,11 @@
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View file

@ -3,6 +3,9 @@ package twitterscraper
import ( import (
"crypto/tls" "crypto/tls"
"errors" "errors"
"fmt"
"golang.org/x/net/proxy"
"log"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@ -20,6 +23,9 @@ type Scraper struct {
includeReplies bool includeReplies bool
searchMode SearchMode searchMode SearchMode
wg sync.WaitGroup wg sync.WaitGroup
Cookie string
XCsrfToken string
} }
// SearchMode type // SearchMode type
@ -80,6 +86,18 @@ func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b) return defaultScraper.WithReplies(b)
} }
// cookie
func (s *Scraper) WithCookie(cookie string) *Scraper {
s.Cookie = cookie
return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.XCsrfToken = xcsrfToken
return s
}
// SetProxy set http proxy in the format `http://HOST:PORT` // SetProxy set http proxy in the format `http://HOST:PORT`
func (s *Scraper) SetProxy(proxy string) error { func (s *Scraper) SetProxy(proxy string) error {
if !strings.HasPrefix(proxy, "http") { if !strings.HasPrefix(proxy, "http") {
@ -101,6 +119,27 @@ func (s *Scraper) SetProxy(proxy string) error {
return nil return nil
} }
// SetProxy set socks5 proxy in the format `HOST:PORT`
func (s *Scraper) SetSocks5Proxy(socks5 string) error {
log.Println(socks5)
if dialer, err := proxy.SOCKS5("tcp", socks5, nil, proxy.Direct); err != nil {
return errors.New(fmt.Sprintf("can't connect to the socks5 proxy: %s, err: %s", socks5, err.Error()))
} else {
s.client = &http.Client{
Transport: &http.Transport{
Dial: dialer.Dial,
// TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
log.Println(s.client)
}
return nil
}
// SetProxy wrapper for default Scraper // SetProxy wrapper for default Scraper
func SetProxy(proxy string) error { func SetProxy(proxy string) error {
return defaultScraper.SetProxy(proxy) return defaultScraper.SetProxy(proxy)

View file

@ -3,6 +3,7 @@ package twitterscraper
import ( import (
"context" "context"
"fmt" "fmt"
"log"
"strconv" "strconv"
) )
@ -41,6 +42,7 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*
req.URL.RawQuery = q.Encode() req.URL.RawQuery = q.Encode()
var timeline timeline var timeline timeline
log.Println(req, timeline, "getuser tww")
err = s.RequestAPI(req, &timeline) err = s.RequestAPI(req, &timeline)
if err != nil { if err != nil {
return nil, "", err return nil, "", err