twitter-scrapper/scraper.go

112 lines
2.3 KiB
Go
Raw Normal View History

2020-12-12 23:33:57 +02:00
package twitterscraper
import (
2020-12-13 00:04:34 +02:00
"crypto/tls"
2020-12-12 23:33:57 +02:00
"errors"
"net"
"net/http"
"net/url"
"strings"
"time"
)
// Scraper object
type Scraper struct {
client *http.Client
guestToken string
includeReplies bool
liveSearch bool
resultFilter string
2020-12-12 23:33:57 +02:00
}
2020-12-12 23:45:14 +02:00
var defaultScraper *Scraper
2020-12-12 23:33:57 +02:00
// New creates a Scraper object
2020-12-12 23:45:14 +02:00
func New() *Scraper {
return &Scraper{
2020-12-12 23:33:57 +02:00
client: &http.Client{Timeout: 10 * time.Second},
}
}
// SetSearchLive enable/disable realtime search
func (s *Scraper) SetSearchLive(b bool) *Scraper {
s.liveSearch = b
return s
}
// SetSearchLive wrapper for default SetSearchLive
func SetSearchLive(b bool) *Scraper {
return defaultScraper.SetSearchLive(b)
}
// SetSearchPhotos filter search for photos only
2020-12-21 19:42:07 +07:00
func (s *Scraper) SetSearchPhotos(srctype bool) *Scraper {
if srctype {
s.resultFilter = "image"
} else {
s.resultFilter = ""
}
return s
}
// SetSearchPhotos wrapper for default SetSearchPhotos
2020-12-21 19:42:07 +07:00
func SetSearchPhotos(srctype bool) *Scraper {
return defaultScraper.SetSearchPhotos(srctype)
}
// SetSearchVideos filter search for videos only
2020-12-21 19:42:07 +07:00
func (s *Scraper) SetSearchVideos(srctype bool) *Scraper {
if srctype {
s.resultFilter = "video"
} else {
s.resultFilter = ""
}
return s
}
// SetSearchVideos wrapper for default SetSearchVideos
2020-12-21 19:42:07 +07:00
func SetSearchVideos(srctype bool) *Scraper {
return defaultScraper.SetSearchVideos(srctype)
}
2020-12-12 23:33:57 +02:00
// WithReplies enable/disable load timeline with tweet replies
func (s *Scraper) WithReplies(b bool) *Scraper {
s.includeReplies = b
return s
}
// WithReplies wrapper for default Scraper
func WithReplies(b bool) *Scraper {
return defaultScraper.WithReplies(b)
}
// SetProxy set http proxy in the format `http://HOST:PORT`
func (s *Scraper) SetProxy(proxy string) error {
2020-12-13 00:04:34 +02:00
if !strings.HasPrefix(proxy, "http") {
return errors.New("only support http(s) protocol")
2020-12-12 23:33:57 +02:00
}
urlproxy, err := url.Parse(proxy)
if err != nil {
return err
}
s.client = &http.Client{
Transport: &http.Transport{
2020-12-13 00:04:34 +02:00
Proxy: http.ProxyURL(urlproxy),
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
2020-12-12 23:33:57 +02:00
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
return nil
}
// SetProxy wrapper for default Scraper
func SetProxy(proxy string) error {
return defaultScraper.SetProxy(proxy)
}
func init() {
defaultScraper = New()
}