Merge pull request #17 from JustHumanz/master

Add http proxy
This commit is contained in:
Nomadic 2020-12-04 14:29:25 +02:00 committed by GitHub
commit 1c582e142e
5 changed files with 85 additions and 3 deletions

View file

@ -62,7 +62,28 @@ func main() {
}
}
```
#### With http proxy
```golang
package main
import (
"context"
"fmt"
twitterscraper "github.com/n0madic/twitter-scraper"
)
func main() {
twitterscraper.SetProxy("http://localhost:16379")
for tweet := range twitterscraper.SearchTweets(context.Background(),
"twitter scraper data -filter:retweets", 50) {
if tweet.Error != nil {
panic(tweet.Error)
}
fmt.Println(tweet.HTML)
}
}
```
The search ends if we have 50 tweets.
See [Rules and filtering](https://developer.twitter.com/en/docs/tweets/rules-and-filtering/overview/standard-operators) for build standard queries.

View file

@ -2,6 +2,7 @@ package twitterscraper
import (
"fmt"
"net"
"net/http"
"strconv"
"strings"
@ -35,13 +36,25 @@ type Profile struct {
func GetProfile(username string) (Profile, error) {
url := "https://mobile.twitter.com/" + username
client := http.DefaultClient
if HTTPProxy != nil {
client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(HTTPProxy),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return Profile{}, err
}
req.Header.Set("Accept-Language", "en-US")
resp, err := http.DefaultClient.Do(req)
resp, err := client.Do(req)
if resp == nil {
return Profile{}, err
}

View file

@ -4,9 +4,11 @@ import (
"context"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
@ -64,6 +66,18 @@ func FetchSearchTweets(query, nextCursor string) ([]*Tweet, string, error) {
url = "https://mobile.twitter.com" + nextCursor
}
client := http.DefaultClient
if HTTPProxy != nil {
client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(HTTPProxy),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, "", err
@ -72,7 +86,7 @@ func FetchSearchTweets(query, nextCursor string) ([]*Tweet, string, error) {
req.Header.Set("Referer", "https://mobile.twitter.com/")
req.Header.Set("User-Agent", "Opera/9.80 (J2ME/MIDP; Opera Mini/5.1.21214/28.2725; U; ru) Presto/2.8.119 Version/11.10")
resp, err := http.DefaultClient.Do(req)
resp, err := client.Do(req)
if resp == nil {
return nil, "", err
}

View file

@ -2,8 +2,10 @@ package twitterscraper
import (
"fmt"
"net"
"net/http"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
@ -12,13 +14,25 @@ const trendsURL = "https://mobile.twitter.com/trends"
// GetTrends return list of trends.
func GetTrends() ([]string, error) {
client := http.DefaultClient
if HTTPProxy != nil {
client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(HTTPProxy),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
}
req, err := http.NewRequest("GET", trendsURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept-Language", "en-US")
resp, err := http.DefaultClient.Do(req)
resp, err := client.Do(req)
if resp == nil {
return nil, err
}

20
util.go
View file

@ -2,11 +2,31 @@ package twitterscraper
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
)
//HttpProxy Public variable for Http proxy
var HTTPProxy *url.URL
//SetProxy set http proxy format `http://HOST:PORT`
func SetProxy(Proxy string) error {
match, _ := regexp.MatchString("http.+", Proxy)
if !match {
return errors.New("only support http protocol")
}
urlproxy, err := url.Parse(Proxy)
if err != nil {
return err
}
HTTPProxy = urlproxy
return nil
}
func newRequest(url string) (*http.Request, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {