Merge pull request #43 from cute-angelia/master

add cookie and x-csrf-token & add proxy sock5
This commit is contained in:
Nomadic 2021-09-10 17:27:35 +03:00 committed by GitHub
commit abc2678351
5 changed files with 106 additions and 20 deletions

View file

@ -29,6 +29,12 @@ import (
func main() {
scraper := twitterscraper.New()
// Cookie and xCsrfToken is optional
// Some specified user tweets are protected that you must login and follow
scraper.WithCookie("twitter cookie after login")
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
if tweet.Error != nil {
panic(tweet.Error)
@ -167,7 +173,11 @@ func main() {
}
```
### Use http proxy
### Use Proxy
Support http and socks5 proxy
#### with http
```golang
err := scraper.SetProxy("http://localhost:3128")
@ -176,6 +186,15 @@ if err != nil {
}
```
#### with socks5
```golang
err := scraper.SetProxy("socks5://localhost:3128")
if err != nil {
panic(err)
}
```
### Delay requests
Add delay between API requests (in seconds)

6
api.go
View file

@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
req.Header.Set("Authorization", "Bearer "+bearerToken)
req.Header.Set("X-Guest-Token", s.guestToken)
// use cookie
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
req.Header.Set("Cookie", s.cookie)
req.Header.Set("x-csrf-token", s.xCsrfToken)
}
resp, err := s.client.Do(req)
if err != nil {
return err

5
go.mod
View file

@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
go 1.13
require github.com/google/go-cmp v0.5.4
require (
github.com/google/go-cmp v0.5.4
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
)

7
go.sum
View file

@ -1,4 +1,11 @@
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View file

@ -3,6 +3,7 @@ package twitterscraper
import (
"crypto/tls"
"errors"
"golang.org/x/net/proxy"
"net"
"net/http"
"net/url"
@ -14,12 +15,16 @@ import (
// Scraper object
type Scraper struct {
client *http.Client
clientTimeout time.Duration
delay int64
guestToken string
guestCreatedAt time.Time
includeReplies bool
searchMode SearchMode
wg sync.WaitGroup
cookie string
xCsrfToken string
}
// SearchMode type
@ -38,12 +43,16 @@ const (
SearchUsers
)
// default http client timeout
const DefaultClientTimeout = 10 * time.Second
var defaultScraper *Scraper
// New creates a Scraper object
func New() *Scraper {
return &Scraper{
client: &http.Client{Timeout: 10 * time.Second},
client: &http.Client{Timeout: DefaultClientTimeout},
clientTimeout: DefaultClientTimeout,
}
}
@ -80,25 +89,67 @@ func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b)
}
// SetProxy set http proxy in the format `http://HOST:PORT`
func (s *Scraper) SetProxy(proxy string) error {
if !strings.HasPrefix(proxy, "http") {
return errors.New("only support http(s) protocol")
// cookie
func (s *Scraper) WithCookie(cookie string) *Scraper {
s.cookie = cookie
return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.xCsrfToken = xcsrfToken
return s
}
// client timeout
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
s.clientTimeout = timeout
return s
}
// SetProxy
// set http proxy in the format `http://HOST:PORT`
// set socket proxy in the format `socks5://HOST:PORT`
func (s *Scraper) SetProxy(proxyAddr string) error {
if strings.HasPrefix(proxyAddr, "http") {
urlproxy, err := url.Parse(proxyAddr)
if err != nil {
return err
}
s.client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
DialContext: (&net.Dialer{
Timeout: s.clientTimeout,
}).DialContext,
},
}
return nil
}
urlproxy, err := url.Parse(proxy)
if err != nil {
return err
if strings.HasPrefix(proxyAddr, "socks5") {
baseDialer := &net.Dialer{
Timeout: s.clientTimeout,
KeepAlive: s.clientTimeout,
}
socksHostPort := strings.ReplaceAll(proxyAddr, "socks5://", "")
dialSocksProxy, err := proxy.SOCKS5("tcp", socksHostPort, nil, baseDialer)
if err != nil {
return errors.New("error creating socks5 proxy :" + err.Error())
}
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
dialContext := contextDialer.DialContext
s.client = &http.Client{
Transport: &http.Transport{
DialContext: dialContext,
},
}
} else {
return errors.New("failed type assertion to DialContext")
}
return nil
}
s.client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
return nil
return errors.New("only support http(s) or socks5 protocol")
}
// SetProxy wrapper for default Scraper