Total refactoring

Used guest frontend API
BREAKING CHANGE: remove tweet.HTML property
Loading more information
Minor fixes and changes
This commit is contained in:
Alexander Sheiko 2020-12-11 20:58:49 +02:00
parent 1c582e142e
commit edad8f6393
15 changed files with 628 additions and 497 deletions

View file

@ -1,55 +1,29 @@
package twitterscraper
import (
"fmt"
"net"
"net/http"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
const trendsURL = "https://mobile.twitter.com/trends"
// GetTrends return list of trends.
func GetTrends() ([]string, error) {
client := http.DefaultClient
if HTTPProxy != nil {
client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(HTTPProxy),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
}
req, err := http.NewRequest("GET", trendsURL, nil)
req, err := newRequest("GET", "https://twitter.com/i/api/2/guide.json")
if err != nil {
return nil, err
}
req.Header.Set("Accept-Language", "en-US")
resp, err := client.Do(req)
if resp == nil {
return nil, err
}
defer resp.Body.Close()
q := req.URL.Query()
q.Add("count", "20")
q.Add("candidate_source", "trends")
q.Add("include_page_configuration", "false")
q.Add("entity_tokens", "false")
req.URL.RawQuery = q.Encode()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
var jsn timeline
err = requestAPI(req, &jsn)
if err != nil {
return nil, err
}
var trends []string
doc.Find("li.topic").Each(func(i int, s *goquery.Selection) {
trends = append(trends, strings.TrimSpace(s.Text()))
})
for _, item := range jsn.Timeline.Instructions[1].AddEntries.Entries[1].Content.TimelineModule.Items {
trends = append(trends, item.Item.ClientEventInfo.Details.GuideDetails.TransparentGuideDetails.TrendMetadata.TrendName)
}
return trends, nil
}