Merge pull request #43 from cute-angelia/master

add cookie and x-csrf-token & add proxy sock5
This commit is contained in:
Nomadic 2021-09-10 17:27:35 +03:00 committed by GitHub
commit abc2678351
5 changed files with 106 additions and 20 deletions

View file

@ -29,6 +29,12 @@ import (
func main() { func main() {
scraper := twitterscraper.New() scraper := twitterscraper.New()
// Cookie and xCsrfToken is optional
// Some specified user tweets are protected that you must login and follow
scraper.WithCookie("twitter cookie after login")
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) { for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
if tweet.Error != nil { if tweet.Error != nil {
panic(tweet.Error) panic(tweet.Error)
@ -167,7 +173,11 @@ func main() {
} }
``` ```
### Use http proxy ### Use Proxy
Support http and socks5 proxy
#### with http
```golang ```golang
err := scraper.SetProxy("http://localhost:3128") err := scraper.SetProxy("http://localhost:3128")
@ -176,6 +186,15 @@ if err != nil {
} }
``` ```
#### with socks5
```golang
err := scraper.SetProxy("socks5://localhost:3128")
if err != nil {
panic(err)
}
```
### Delay requests ### Delay requests
Add delay between API requests (in seconds) Add delay between API requests (in seconds)

6
api.go
View file

@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
req.Header.Set("Authorization", "Bearer "+bearerToken) req.Header.Set("Authorization", "Bearer "+bearerToken)
req.Header.Set("X-Guest-Token", s.guestToken) req.Header.Set("X-Guest-Token", s.guestToken)
// use cookie
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
req.Header.Set("Cookie", s.cookie)
req.Header.Set("x-csrf-token", s.xCsrfToken)
}
resp, err := s.client.Do(req) resp, err := s.client.Do(req)
if err != nil { if err != nil {
return err return err

5
go.mod
View file

@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
go 1.13 go 1.13
require github.com/google/go-cmp v0.5.4 require (
github.com/google/go-cmp v0.5.4
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
)

7
go.sum
View file

@ -1,4 +1,11 @@
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View file

@ -3,6 +3,7 @@ package twitterscraper
import ( import (
"crypto/tls" "crypto/tls"
"errors" "errors"
"golang.org/x/net/proxy"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@ -14,12 +15,16 @@ import (
// Scraper object // Scraper object
type Scraper struct { type Scraper struct {
client *http.Client client *http.Client
clientTimeout time.Duration
delay int64 delay int64
guestToken string guestToken string
guestCreatedAt time.Time guestCreatedAt time.Time
includeReplies bool includeReplies bool
searchMode SearchMode searchMode SearchMode
wg sync.WaitGroup wg sync.WaitGroup
cookie string
xCsrfToken string
} }
// SearchMode type // SearchMode type
@ -38,12 +43,16 @@ const (
SearchUsers SearchUsers
) )
// default http client timeout
const DefaultClientTimeout = 10 * time.Second
var defaultScraper *Scraper var defaultScraper *Scraper
// New creates a Scraper object // New creates a Scraper object
func New() *Scraper { func New() *Scraper {
return &Scraper{ return &Scraper{
client: &http.Client{Timeout: 10 * time.Second}, client: &http.Client{Timeout: DefaultClientTimeout},
clientTimeout: DefaultClientTimeout,
} }
} }
@ -80,25 +89,67 @@ func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b) return defaultScraper.WithReplies(b)
} }
// SetProxy set http proxy in the format `http://HOST:PORT` // cookie
func (s *Scraper) SetProxy(proxy string) error { func (s *Scraper) WithCookie(cookie string) *Scraper {
if !strings.HasPrefix(proxy, "http") { s.cookie = cookie
return errors.New("only support http(s) protocol") return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.xCsrfToken = xcsrfToken
return s
}
// client timeout
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
s.clientTimeout = timeout
return s
}
// SetProxy
// set http proxy in the format `http://HOST:PORT`
// set socket proxy in the format `socks5://HOST:PORT`
func (s *Scraper) SetProxy(proxyAddr string) error {
if strings.HasPrefix(proxyAddr, "http") {
urlproxy, err := url.Parse(proxyAddr)
if err != nil {
return err
}
s.client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
DialContext: (&net.Dialer{
Timeout: s.clientTimeout,
}).DialContext,
},
}
return nil
} }
urlproxy, err := url.Parse(proxy) if strings.HasPrefix(proxyAddr, "socks5") {
if err != nil { baseDialer := &net.Dialer{
return err Timeout: s.clientTimeout,
KeepAlive: s.clientTimeout,
}
socksHostPort := strings.ReplaceAll(proxyAddr, "socks5://", "")
dialSocksProxy, err := proxy.SOCKS5("tcp", socksHostPort, nil, baseDialer)
if err != nil {
return errors.New("error creating socks5 proxy :" + err.Error())
}
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
dialContext := contextDialer.DialContext
s.client = &http.Client{
Transport: &http.Transport{
DialContext: dialContext,
},
}
} else {
return errors.New("failed type assertion to DialContext")
}
return nil
} }
s.client = &http.Client{ return errors.New("only support http(s) or socks5 protocol")
Transport: &http.Transport{
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
return nil
} }
// SetProxy wrapper for default Scraper // SetProxy wrapper for default Scraper