Merge pull request #10 from n0madic/fix-broken-api

Fix broken Twitter API
This commit is contained in:
Nomadic 2020-08-25 09:56:30 +03:00 committed by GitHub
commit c181d32eee
4 changed files with 56 additions and 56 deletions

View file

@ -33,12 +33,13 @@ type Profile struct {
// GetProfile return parsed user profile.
func GetProfile(username string) (Profile, error) {
url := "https://twitter.com/" + username
url := "https://mobile.twitter.com/" + username
req, err := newRequest(url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return Profile{}, err
}
req.Header.Set("Accept-Language", "en-US")
resp, err := http.DefaultClient.Do(req)
if resp == nil {
@ -55,41 +56,31 @@ func GetProfile(username string) (Profile, error) {
return Profile{}, err
}
// parse location
location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text())
// parse join date text
joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", ""))
screenName := doc.Find(".screen-name").First().Text()
// check is username valid
if location == "" && joined.IsZero() {
if screenName == "" {
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}
return Profile{
Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""),
Banner: doc.Find(".ProfileCanopy-headerBg img").First().AttrOr("src", ""),
Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(),
Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""),
FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()),
FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()),
IsPrivate: doc.Find(".ProfileHeaderCard-badges .Icon--protected").First().Text() != "",
IsVerified: doc.Find(".ProfileHeaderCard-badges .Icon--verified").First().Text() != "",
Joined: &joined,
LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()),
Location: location,
Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(),
TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()),
URL: url,
UserID: doc.Find(".ProfileNav").First().AttrOr("data-user-id", ""),
Username: doc.Find(".u-linkComplex-target").First().Text(),
Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")),
Avatar: doc.Find("td.avatar > img").First().AttrOr("src", ""),
Biography: strings.TrimSpace(doc.Find(".bio").First().Text()),
FollowersCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(3) > a > div.statnum").First().Text()),
FollowingCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(2) > a > div.statnum").First().Text()),
IsPrivate: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "protected"),
IsVerified: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "verified"),
Location: strings.TrimSpace(doc.Find(".location").First().Text()),
Name: strings.TrimSpace(doc.Find(".fullname").First().Text()),
TweetsCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(1) > div.statnum").First().Text()),
URL: "https://twitter.com/" + screenName,
Username: screenName,
Website: strings.TrimSpace(doc.Find("div.url > div > a").First().AttrOr("data-url", "")),
}, nil
}
func parseCount(sel *goquery.Selection) (i int) {
if str, exists := sel.Attr("data-count"); exists {
i, _ = strconv.Atoi(str)
}
func parseCount(str string) (i int) {
i, _ = strconv.Atoi(strings.Replace(str, ",", "", -1))
return
}

View file

@ -2,29 +2,28 @@ package twitterscraper
import (
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
func TestGetProfile(t *testing.T) {
loc := time.FixedZone("UTC", 0)
joined := time.Date(2007, 02, 20, 6, 35, 0, 0, loc)
// loc := time.FixedZone("UTC", 0)
// joined := time.Date(2007, 02, 20, 6, 35, 0, 0, loc)
sample := Profile{
Avatar: "https://pbs.twimg.com/profile_images/1288505768673603584/yTzT2JgU_400x400.png",
Banner: "https://pbs.twimg.com/profile_banners/783214/1596041768/1500x500",
Biography: "bye",
Birthday: "March 21",
Avatar: "https://pbs.twimg.com/profile_images/1293262551057420293/4QMAmava_normal.jpg",
// Banner: "https://pbs.twimg.com/profile_banners/783214/1596041768/1500x500",
Biography: "#BlackLivesMatter\n#BlackTransLivesMatter",
// Birthday: "March 21",
IsPrivate: false,
IsVerified: true,
Joined: &joined,
Location: "Space",
Name: "Twitter",
URL: "https://twitter.com/Twitter",
UserID: "783214",
Username: "Twitter",
Website: "https://about.twitter.com/",
// Joined: &joined,
Location: "everywhere",
Name: "Twitter",
URL: "https://twitter.com/Twitter",
// UserID: "783214",
Username: "Twitter",
Website: "about.twitter.com",
}
profile, err := GetProfile("Twitter")
@ -48,9 +47,9 @@ func TestGetProfile(t *testing.T) {
if profile.FollowingCount == 0 {
t.Error("Expected FollowingCount is greater than zero")
}
if profile.LikesCount == 0 {
t.Error("Expected LikesCount is greater than zero")
}
// if profile.LikesCount == 0 {
// t.Error("Expected LikesCount is greater than zero")
// }
if profile.TweetsCount == 0 {
t.Error("Expected TweetsCount is greater than zero")
}

View file

@ -1,33 +1,41 @@
package twitterscraper
import (
"fmt"
"net/http"
"strings"
"github.com/PuerkitoBio/goquery"
)
const trendsURL = "https://twitter.com/i/trends"
const trendsURL = "https://mobile.twitter.com/trends"
// GetTrends return list of trends.
func GetTrends() ([]string, error) {
req, err := newRequest(trendsURL)
req, err := http.NewRequest("GET", trendsURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept-Language", "en-US")
htm, err := getHTMLFromJSON(req, "module_html")
if err != nil {
resp, err := http.DefaultClient.Do(req)
if resp == nil {
return nil, err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(htm)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
var trends []string
doc.Find("li").Each(func(i int, s *goquery.Selection) {
if trend, ok := s.Attr("data-trend-name"); ok {
trends = append(trends, trend)
}
doc.Find("li.topic").Each(func(i int, s *goquery.Selection) {
trends = append(trends, strings.TrimSpace(s.Text()))
})
return trends, nil
}

View file

@ -61,7 +61,8 @@ func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Resul
default:
}
tweets, err := FetchTweets(user, lastTweetID)
query := fmt.Sprintf("(from:%s)", user)
tweets, err := FetchSearchTweets(query, lastTweetID)
if err != nil {
channel <- &Result{Error: err}
return
@ -80,7 +81,8 @@ func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Resul
}
if tweetsNbr < maxTweetsNbr {
lastTweetID = tweet.ID
lastId, _ := strconv.ParseInt(tweet.ID, 10, 64)
lastTweetID = strconv.FormatInt(lastId-1, 10)
channel <- &Result{Tweet: *tweet}
}
tweetsNbr++