Merge pull request #43 from cute-angelia/master
add cookie and x-csrf-token & add proxy sock5
This commit is contained in:
commit
abc2678351
5 changed files with 106 additions and 20 deletions
21
README.md
21
README.md
|
|
@ -29,6 +29,12 @@ import (
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
scraper := twitterscraper.New()
|
scraper := twitterscraper.New()
|
||||||
|
|
||||||
|
// Cookie and xCsrfToken is optional
|
||||||
|
// Some specified user tweets are protected that you must login and follow
|
||||||
|
scraper.WithCookie("twitter cookie after login")
|
||||||
|
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
|
||||||
|
|
||||||
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
|
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
|
||||||
if tweet.Error != nil {
|
if tweet.Error != nil {
|
||||||
panic(tweet.Error)
|
panic(tweet.Error)
|
||||||
|
|
@ -167,7 +173,11 @@ func main() {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use http proxy
|
### Use Proxy
|
||||||
|
|
||||||
|
Support http and socks5 proxy
|
||||||
|
|
||||||
|
#### with http
|
||||||
|
|
||||||
```golang
|
```golang
|
||||||
err := scraper.SetProxy("http://localhost:3128")
|
err := scraper.SetProxy("http://localhost:3128")
|
||||||
|
|
@ -176,6 +186,15 @@ if err != nil {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### with socks5
|
||||||
|
|
||||||
|
```golang
|
||||||
|
err := scraper.SetProxy("socks5://localhost:3128")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Delay requests
|
### Delay requests
|
||||||
|
|
||||||
Add delay between API requests (in seconds)
|
Add delay between API requests (in seconds)
|
||||||
|
|
|
||||||
6
api.go
6
api.go
|
|
@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||||
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
||||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||||
|
|
||||||
|
// use cookie
|
||||||
|
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
|
||||||
|
req.Header.Set("Cookie", s.cookie)
|
||||||
|
req.Header.Set("x-csrf-token", s.xCsrfToken)
|
||||||
|
}
|
||||||
|
|
||||||
resp, err := s.client.Do(req)
|
resp, err := s.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
||||||
5
go.mod
5
go.mod
|
|
@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
|
||||||
|
|
||||||
go 1.13
|
go 1.13
|
||||||
|
|
||||||
require github.com/google/go-cmp v0.5.4
|
require (
|
||||||
|
github.com/google/go-cmp v0.5.4
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
|
||||||
|
)
|
||||||
|
|
|
||||||
7
go.sum
7
go.sum
|
|
@ -1,4 +1,11 @@
|
||||||
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
|
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
|
||||||
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
|
|
||||||
87
scraper.go
87
scraper.go
|
|
@ -3,6 +3,7 @@ package twitterscraper
|
||||||
import (
|
import (
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"errors"
|
"errors"
|
||||||
|
"golang.org/x/net/proxy"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
@ -14,12 +15,16 @@ import (
|
||||||
// Scraper object
|
// Scraper object
|
||||||
type Scraper struct {
|
type Scraper struct {
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
clientTimeout time.Duration
|
||||||
delay int64
|
delay int64
|
||||||
guestToken string
|
guestToken string
|
||||||
guestCreatedAt time.Time
|
guestCreatedAt time.Time
|
||||||
includeReplies bool
|
includeReplies bool
|
||||||
searchMode SearchMode
|
searchMode SearchMode
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
|
||||||
|
cookie string
|
||||||
|
xCsrfToken string
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchMode type
|
// SearchMode type
|
||||||
|
|
@ -38,12 +43,16 @@ const (
|
||||||
SearchUsers
|
SearchUsers
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// default http client timeout
|
||||||
|
const DefaultClientTimeout = 10 * time.Second
|
||||||
|
|
||||||
var defaultScraper *Scraper
|
var defaultScraper *Scraper
|
||||||
|
|
||||||
// New creates a Scraper object
|
// New creates a Scraper object
|
||||||
func New() *Scraper {
|
func New() *Scraper {
|
||||||
return &Scraper{
|
return &Scraper{
|
||||||
client: &http.Client{Timeout: 10 * time.Second},
|
client: &http.Client{Timeout: DefaultClientTimeout},
|
||||||
|
clientTimeout: DefaultClientTimeout,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -80,25 +89,67 @@ func WithReplies(b bool) *Scraper {
|
||||||
return defaultScraper.WithReplies(b)
|
return defaultScraper.WithReplies(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetProxy set http proxy in the format `http://HOST:PORT`
|
// cookie
|
||||||
func (s *Scraper) SetProxy(proxy string) error {
|
func (s *Scraper) WithCookie(cookie string) *Scraper {
|
||||||
if !strings.HasPrefix(proxy, "http") {
|
s.cookie = cookie
|
||||||
return errors.New("only support http(s) protocol")
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// x csrf token
|
||||||
|
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
|
||||||
|
s.xCsrfToken = xcsrfToken
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// client timeout
|
||||||
|
func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper {
|
||||||
|
s.clientTimeout = timeout
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetProxy
|
||||||
|
// set http proxy in the format `http://HOST:PORT`
|
||||||
|
// set socket proxy in the format `socks5://HOST:PORT`
|
||||||
|
func (s *Scraper) SetProxy(proxyAddr string) error {
|
||||||
|
if strings.HasPrefix(proxyAddr, "http") {
|
||||||
|
urlproxy, err := url.Parse(proxyAddr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.client = &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
Proxy: http.ProxyURL(urlproxy),
|
||||||
|
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: s.clientTimeout,
|
||||||
|
}).DialContext,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
urlproxy, err := url.Parse(proxy)
|
if strings.HasPrefix(proxyAddr, "socks5") {
|
||||||
if err != nil {
|
baseDialer := &net.Dialer{
|
||||||
return err
|
Timeout: s.clientTimeout,
|
||||||
|
KeepAlive: s.clientTimeout,
|
||||||
|
}
|
||||||
|
socksHostPort := strings.ReplaceAll(proxyAddr, "socks5://", "")
|
||||||
|
dialSocksProxy, err := proxy.SOCKS5("tcp", socksHostPort, nil, baseDialer)
|
||||||
|
if err != nil {
|
||||||
|
return errors.New("error creating socks5 proxy :" + err.Error())
|
||||||
|
}
|
||||||
|
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
|
||||||
|
dialContext := contextDialer.DialContext
|
||||||
|
s.client = &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
DialContext: dialContext,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return errors.New("failed type assertion to DialContext")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
s.client = &http.Client{
|
return errors.New("only support http(s) or socks5 protocol")
|
||||||
Transport: &http.Transport{
|
|
||||||
Proxy: http.ProxyURL(urlproxy),
|
|
||||||
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
|
||||||
DialContext: (&net.Dialer{
|
|
||||||
Timeout: 10 * time.Second,
|
|
||||||
}).DialContext,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetProxy wrapper for default Scraper
|
// SetProxy wrapper for default Scraper
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue