2020-12-12 23:33:57 +02:00
|
|
|
package twitterscraper
|
|
|
|
|
|
|
|
|
|
import (
|
2020-12-13 00:04:34 +02:00
|
|
|
"crypto/tls"
|
2020-12-12 23:33:57 +02:00
|
|
|
"errors"
|
|
|
|
|
"net"
|
|
|
|
|
"net/http"
|
|
|
|
|
"net/url"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Scraper object
|
|
|
|
|
type Scraper struct {
|
|
|
|
|
client *http.Client
|
|
|
|
|
guestToken string
|
|
|
|
|
includeReplies bool
|
2020-12-23 19:08:17 +02:00
|
|
|
liveSearch bool
|
|
|
|
|
resultFilter string
|
2020-12-12 23:33:57 +02:00
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:45:14 +02:00
|
|
|
var defaultScraper *Scraper
|
2020-12-12 23:33:57 +02:00
|
|
|
|
|
|
|
|
// New creates a Scraper object
|
2020-12-12 23:45:14 +02:00
|
|
|
func New() *Scraper {
|
|
|
|
|
return &Scraper{
|
2020-12-12 23:33:57 +02:00
|
|
|
client: &http.Client{Timeout: 10 * time.Second},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-20 00:20:27 +07:00
|
|
|
// SetSearchLive enable/disable realtime search
|
2020-12-23 19:08:17 +02:00
|
|
|
func (s *Scraper) SetSearchLive(b bool) *Scraper {
|
|
|
|
|
s.liveSearch = b
|
2020-12-20 00:20:27 +07:00
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetSearchLive wrapper for default SetSearchLive
|
2020-12-23 19:08:17 +02:00
|
|
|
func SetSearchLive(b bool) *Scraper {
|
|
|
|
|
return defaultScraper.SetSearchLive(b)
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetSearchPhotos filter search for photos only
|
2020-12-21 19:42:07 +07:00
|
|
|
func (s *Scraper) SetSearchPhotos(srctype bool) *Scraper {
|
2020-12-20 00:20:27 +07:00
|
|
|
if srctype {
|
2020-12-23 19:08:17 +02:00
|
|
|
s.resultFilter = "image"
|
|
|
|
|
} else {
|
|
|
|
|
s.resultFilter = ""
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetSearchPhotos wrapper for default SetSearchPhotos
|
2020-12-21 19:42:07 +07:00
|
|
|
func SetSearchPhotos(srctype bool) *Scraper {
|
|
|
|
|
return defaultScraper.SetSearchPhotos(srctype)
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetSearchVideos filter search for videos only
|
2020-12-21 19:42:07 +07:00
|
|
|
func (s *Scraper) SetSearchVideos(srctype bool) *Scraper {
|
2020-12-20 00:20:27 +07:00
|
|
|
if srctype {
|
2020-12-23 19:08:17 +02:00
|
|
|
s.resultFilter = "video"
|
|
|
|
|
} else {
|
|
|
|
|
s.resultFilter = ""
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetSearchVideos wrapper for default SetSearchVideos
|
2020-12-21 19:42:07 +07:00
|
|
|
func SetSearchVideos(srctype bool) *Scraper {
|
|
|
|
|
return defaultScraper.SetSearchVideos(srctype)
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// WithReplies enable/disable load timeline with tweet replies
|
|
|
|
|
func (s *Scraper) WithReplies(b bool) *Scraper {
|
|
|
|
|
s.includeReplies = b
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// WithReplies wrapper for default Scraper
|
|
|
|
|
func WithReplies(b bool) *Scraper {
|
|
|
|
|
return defaultScraper.WithReplies(b)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetProxy set http proxy in the format `http://HOST:PORT`
|
|
|
|
|
func (s *Scraper) SetProxy(proxy string) error {
|
2020-12-13 00:04:34 +02:00
|
|
|
if !strings.HasPrefix(proxy, "http") {
|
|
|
|
|
return errors.New("only support http(s) protocol")
|
2020-12-12 23:33:57 +02:00
|
|
|
}
|
|
|
|
|
urlproxy, err := url.Parse(proxy)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
s.client = &http.Client{
|
|
|
|
|
Transport: &http.Transport{
|
2020-12-13 00:04:34 +02:00
|
|
|
Proxy: http.ProxyURL(urlproxy),
|
|
|
|
|
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
|
2020-12-12 23:33:57 +02:00
|
|
|
DialContext: (&net.Dialer{
|
|
|
|
|
Timeout: 10 * time.Second,
|
|
|
|
|
}).DialContext,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SetProxy wrapper for default Scraper
|
|
|
|
|
func SetProxy(proxy string) error {
|
|
|
|
|
return defaultScraper.SetProxy(proxy)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
defaultScraper = New()
|
|
|
|
|
}
|