twitter-scrapper/scraper.go

159 lines
3.3 KiB
Go
Raw Normal View History

2020-12-12 23:33:57 +02:00
package twitterscraper
import (
2020-12-13 00:04:34 +02:00
"crypto/tls"
2020-12-12 23:33:57 +02:00
"errors"
2021-09-08 16:02:53 +08:00
"golang.org/x/net/proxy"
2020-12-12 23:33:57 +02:00
"net"
"net/http"
"net/url"
"strings"
"sync"
2020-12-12 23:33:57 +02:00
"time"
)
// Scraper object
type Scraper struct {
client *http.Client
delay int64
2020-12-12 23:33:57 +02:00
guestToken string
2021-01-05 14:21:08 +02:00
guestCreatedAt time.Time
2020-12-12 23:33:57 +02:00
includeReplies bool
2020-12-23 19:53:48 +02:00
searchMode SearchMode
wg sync.WaitGroup
2021-09-08 16:02:53 +08:00
Cookie string
XCsrfToken string
2020-12-12 23:33:57 +02:00
}
2020-12-23 19:53:48 +02:00
// SearchMode type
type SearchMode int
const (
// SearchTop - default mode
SearchTop SearchMode = iota
// SearchLatest - live mode
SearchLatest
// SearchPhotos - image mode
SearchPhotos
// SearchVideos - video mode
SearchVideos
// SearchUsers - user mode
SearchUsers
2020-12-23 19:53:48 +02:00
)
2020-12-12 23:45:14 +02:00
var defaultScraper *Scraper
2020-12-12 23:33:57 +02:00
// New creates a Scraper object
2020-12-12 23:45:14 +02:00
func New() *Scraper {
return &Scraper{
2020-12-12 23:33:57 +02:00
client: &http.Client{Timeout: 10 * time.Second},
}
}
2020-12-23 19:53:48 +02:00
// SetSearchMode switcher
func (s *Scraper) SetSearchMode(mode SearchMode) *Scraper {
s.searchMode = mode
return s
}
2020-12-23 19:53:48 +02:00
// SetSearchMode wrapper for default Scraper
func SetSearchMode(mode SearchMode) *Scraper {
return defaultScraper.SetSearchMode(mode)
}
// WithDelay add delay between API requests (in seconds)
func (s *Scraper) WithDelay(seconds int64) *Scraper {
s.delay = seconds
return s
}
// WithDelay wrapper for default Scraper
func WithDelay(seconds int64) *Scraper {
return defaultScraper.WithDelay(seconds)
}
2020-12-12 23:33:57 +02:00
// WithReplies enable/disable load timeline with tweet replies
func (s *Scraper) WithReplies(b bool) *Scraper {
s.includeReplies = b
return s
}
// WithReplies wrapper for default Scraper
func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b)
}
2021-09-08 16:02:53 +08:00
// cookie
func (s *Scraper) WithCookie(cookie string) *Scraper {
s.Cookie = cookie
return s
}
// x csrf token
func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper {
s.XCsrfToken = xcsrfToken
return s
}
2020-12-12 23:33:57 +02:00
// SetProxy set http proxy in the format `http://HOST:PORT`
func (s *Scraper) SetProxy(proxy string) error {
2020-12-13 00:04:34 +02:00
if !strings.HasPrefix(proxy, "http") {
return errors.New("only support http(s) protocol")
2020-12-12 23:33:57 +02:00
}
urlproxy, err := url.Parse(proxy)
if err != nil {
return err
}
s.client = &http.Client{
Transport: &http.Transport{
2020-12-13 00:04:34 +02:00
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
2020-12-12 23:33:57 +02:00
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
return nil
}
2021-09-08 16:02:53 +08:00
// SetProxy set socks5 proxy in the format `HOST:PORT`
func (s *Scraper) SetSocks5Proxy(socks5 string) error {
2021-09-08 16:30:37 +08:00
baseDialer := &net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}
if socks5 != "" {
dialSocksProxy, err := proxy.SOCKS5("tcp", socks5, nil, baseDialer)
if err != nil {
return errors.New("Error creating SOCKS5 proxy")
}
if contextDialer, ok := dialSocksProxy.(proxy.ContextDialer); ok {
dialContext := contextDialer.DialContext
s.client = &http.Client{
Transport: &http.Transport{
DialContext: dialContext,
},
}
} else {
return errors.New("Failed type assertion to DialContext")
}
2021-09-08 16:02:53 +08:00
} else {
s.client = &http.Client{
Transport: &http.Transport{
2021-09-08 16:30:37 +08:00
DialContext: (baseDialer).DialContext,
2021-09-08 16:02:53 +08:00
},
}
}
return nil
}
2020-12-12 23:33:57 +02:00
// SetProxy wrapper for default Scraper
func SetProxy(proxy string) error {
return defaultScraper.SetProxy(proxy)
}
func init() {
defaultScraper = New()
}