add cookie and x-csrf-token
This commit is contained in:
parent
d8dd7d54ea
commit
31c9e5da5a
5 changed files with 58 additions and 1 deletions
6
api.go
6
api.go
|
|
@ -33,6 +33,12 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||||
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
||||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||||
|
|
||||||
|
// use cookie
|
||||||
|
if len(s.Cookie) > 0 && len(s.XCsrfToken) > 0 {
|
||||||
|
req.Header.Set("Cookie", s.Cookie)
|
||||||
|
req.Header.Set("x-csrf-token", s.XCsrfToken)
|
||||||
|
}
|
||||||
|
|
||||||
resp, err := s.client.Do(req)
|
resp, err := s.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
||||||
5
go.mod
5
go.mod
|
|
@ -2,4 +2,7 @@ module github.com/n0madic/twitter-scraper
|
||||||
|
|
||||||
go 1.13
|
go 1.13
|
||||||
|
|
||||||
require github.com/google/go-cmp v0.5.4
|
require (
|
||||||
|
github.com/google/go-cmp v0.5.4
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d
|
||||||
|
)
|
||||||
|
|
|
||||||
7
go.sum
7
go.sum
|
|
@ -1,4 +1,11 @@
|
||||||
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
|
github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
|
||||||
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d h1:20cMwl2fHAzkJMEA+8J4JgqBQcQGzbisXo31MIeenXI=
|
||||||
|
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
|
|
||||||
39
scraper.go
39
scraper.go
|
|
@ -3,6 +3,9 @@ package twitterscraper
|
||||||
import (
|
import (
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"golang.org/x/net/proxy"
|
||||||
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
@ -20,6 +23,9 @@ type Scraper struct {
|
||||||
includeReplies bool
|
includeReplies bool
|
||||||
searchMode SearchMode
|
searchMode SearchMode
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
|
||||||
|
Cookie string
|
||||||
|
XCsrfToken string
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchMode type
|
// SearchMode type
|
||||||
|
|
@ -80,6 +86,18 @@ func WithReplies(b bool) *Scraper {
|
||||||
return defaultScraper.WithReplies(b)
|
return defaultScraper.WithReplies(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cookie
|
||||||
|
func (s *Scraper) WithCookie(cookie string) *Scraper {
|
||||||
|
s.Cookie = cookie
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// x csrf token
|
||||||
|
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
|
||||||
|
s.XCsrfToken = xcsrfToken
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
// SetProxy set http proxy in the format `http://HOST:PORT`
|
// SetProxy set http proxy in the format `http://HOST:PORT`
|
||||||
func (s *Scraper) SetProxy(proxy string) error {
|
func (s *Scraper) SetProxy(proxy string) error {
|
||||||
if !strings.HasPrefix(proxy, "http") {
|
if !strings.HasPrefix(proxy, "http") {
|
||||||
|
|
@ -101,6 +119,27 @@ func (s *Scraper) SetProxy(proxy string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetProxy set socks5 proxy in the format `HOST:PORT`
|
||||||
|
func (s *Scraper) SetSocks5Proxy(socks5 string) error {
|
||||||
|
log.Println(socks5)
|
||||||
|
if dialer, err := proxy.SOCKS5("tcp", socks5, nil, proxy.Direct); err != nil {
|
||||||
|
return errors.New(fmt.Sprintf("can't connect to the socks5 proxy: %s, err: %s", socks5, err.Error()))
|
||||||
|
} else {
|
||||||
|
s.client = &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
Dial: dialer.Dial,
|
||||||
|
// TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: 10 * time.Second,
|
||||||
|
}).DialContext,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println(s.client)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// SetProxy wrapper for default Scraper
|
// SetProxy wrapper for default Scraper
|
||||||
func SetProxy(proxy string) error {
|
func SetProxy(proxy string) error {
|
||||||
return defaultScraper.SetProxy(proxy)
|
return defaultScraper.SetProxy(proxy)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package twitterscraper
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"strconv"
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -41,6 +42,7 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*
|
||||||
req.URL.RawQuery = q.Encode()
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
var timeline timeline
|
var timeline timeline
|
||||||
|
log.Println(req, timeline, "getuser tww")
|
||||||
err = s.RequestAPI(req, &timeline)
|
err = s.RequestAPI(req, &timeline)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, "", err
|
return nil, "", err
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue