Merge pull request #43 from cute-angelia/master
add cookie and x-csrf-token & add proxy sock5
This commit is contained in:
commit
abc2678351
5 changed files with 106 additions and 20 deletions
21
README.md
21
README.md
|
|
@ -29,6 +29,12 @@ import (
|
|||
|
||||
func main() {
|
||||
scraper := twitterscraper.New()
|
||||
|
||||
// Cookie and xCsrfToken is optional
|
||||
// Some specified user tweets are protected that you must login and follow
|
||||
scraper.WithCookie("twitter cookie after login")
|
||||
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
|
||||
|
||||
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
|
||||
if tweet.Error != nil {
|
||||
panic(tweet.Error)
|
||||
|
|
@ -167,7 +173,11 @@ func main() {
|
|||
}
|
||||
```
|
||||
|
||||
### Use http proxy
|
||||
### Use Proxy
|
||||
|
||||
Support http and socks5 proxy
|
||||
|
||||
#### with http
|
||||
|
||||
```golang
|
||||
err := scraper.SetProxy("http://localhost:3128")
|
||||
|
|
@ -176,6 +186,15 @@ if err != nil {
|
|||
}
|
||||
```
|
||||
|
||||
#### with socks5
|
||||
|
||||
```golang
|
||||
err := scraper.SetProxy("socks5://localhost:3128")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
```
|
||||
|
||||
### Delay requests
|
||||
|
||||
Add delay between API requests (in seconds)
|
||||
|
|
|
|||
6
api.go
6
api.go
|
|
@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
|||
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||
|
||||
// use cookie
|
||||
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
|
||||
req.Header.Set("Cookie", s.cookie)
|
||||
req.Header.Set("x-csrf-token", s.xCsrfToken)
|
||||
}
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
|||
5
go.mod
5
go.mod
|
|
@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
|
|||
|
||||
go 1.13
|
||||
|
||||
require github.com/google/go-cmp v0.5.4
|
||||
require (
|
||||
github.com/google/go-cmp v0.5.4
|
||||
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
|
||||
)
|
||||
|
|
|
|||
7
go.sum
7
go.sum
|
|
@ -1,4 +1,11 @@
|
|||
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
|
||||
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
|
||||
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
|
|
|||
87
scraper.go
87
scraper.go
|
|
@ -3,6 +3,7 @@ package twitterscraper
|
|||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"golang.org/x/net/proxy"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
|
@ -14,12 +15,16 @@ import (
|
|||
// Scraper object
|
||||
type Scraper struct {
|
||||
client *http.Client
|
||||
clientTimeout time.Duration
|
||||
delay int64
|
||||
guestToken string
|
||||
guestCreatedAt time.Time
|
||||
includeReplies bool
|
||||
searchMode SearchMode
|
||||
wg sync.WaitGroup
|
||||
|
||||
cookie string
|
||||
xCsrfToken string
|
||||
}
|
||||
|
||||
// SearchMode type
|
||||
|
|
@ -38,12 +43,16 @@ const (
|
|||
SearchUsers
|
||||
)
|
||||
|
||||
// default http client timeout
|
||||
const DefaultClientTimeout = 10 * time.Second
|
||||
|
||||
var defaultScraper *Scraper
|
||||
|
||||
// New creates a Scraper object
|
||||
func New() *Scraper {
|
||||
return &Scraper{
|
||||
client: &http.Client{Timeout: 10 * time.Second},
|
||||
client: &http.Client{Timeout: DefaultClientTimeout},
|
||||
clientTimeout: DefaultClientTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -80,25 +89,67 @@ func WithReplies(b bool) *Scraper {
|
|||
return defaultScraper.WithReplies(b)
|
||||
}
|
||||
|
||||
// SetProxy set http proxy in the format `http://HOST:PORT`
|
||||
func (s *Scraper) SetProxy(proxy string) error {
|
||||
if !strings.HasPrefix(proxy, "http") {
|
||||
return errors.New("only support http(s) protocol")
|
||||
// cookie
|
||||
func (s *Scraper) WithCookie(cookie string) *Scraper {
|
||||
s.cookie = cookie
|
||||
return s
|
||||
}
|
||||
|
||||
// x csrf token
|
||||
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
|
||||
s.xCsrfToken = xcsrfToken
|
||||
return s
|
||||
}
|
||||
|
||||
// client timeout
|
||||
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
|
||||
s.clientTimeout = timeout
|
||||
return s
|
||||
}
|
||||
|
||||
// SetProxy
|
||||
// set http proxy in the format `http://HOST:PORT`
|
||||
// set socket proxy in the format `socks5://HOST:PORT`
|
||||
func (s *Scraper) SetProxy(proxyAddr string) error {
|
||||
if strings.HasPrefix(proxyAddr, "http") {
|
||||
urlproxy, err := url.Parse(proxyAddr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(urlproxy),
|
||||
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: s.clientTimeout,
|
||||
}).DialContext,
|
||||
},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
urlproxy, err := url.Parse(proxy)
|
||||
if err != nil {
|
||||
return err
|
||||
if strings.HasPrefix(proxyAddr, "socks5") {
|
||||
baseDialer := &net.Dialer{
|
||||
Timeout: s.clientTimeout,
|
||||
KeepAlive: s.clientTimeout,
|
||||
}
|
||||
socksHostPort := strings.ReplaceAll(proxyAddr, "socks5://", "")
|
||||
dialSocksProxy, err := proxy.SOCKS5("tcp", socksHostPort, nil, baseDialer)
|
||||
if err != nil {
|
||||
return errors.New("error creating socks5 proxy :" + err.Error())
|
||||
}
|
||||
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
|
||||
dialContext := contextDialer.DialContext
|
||||
s.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: dialContext,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
return errors.New("failed type assertion to DialContext")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
s.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(urlproxy),
|
||||
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 10 * time.Second,
|
||||
}).DialContext,
|
||||
},
|
||||
}
|
||||
return nil
|
||||
return errors.New("only support http(s) or socks5 protocol")
|
||||
}
|
||||
|
||||
// SetProxy wrapper for default Scraper
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue