diff --git a/profile.go b/profile.go index 100434d..2efa234 100644 --- a/profile.go +++ b/profile.go @@ -33,12 +33,13 @@ type Profile struct { // GetProfile return parsed user profile. func GetProfile(username string) (Profile, error) { - url := "https://twitter.com/" + username + url := "https://mobile.twitter.com/" + username - req, err := newRequest(url) + req, err := http.NewRequest("GET", url, nil) if err != nil { return Profile{}, err } + req.Header.Set("Accept-Language", "en-US") resp, err := http.DefaultClient.Do(req) if resp == nil { @@ -55,41 +56,31 @@ func GetProfile(username string) (Profile, error) { return Profile{}, err } - // parse location - location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text()) - // parse join date text - joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", "")) + screenName := doc.Find(".screen-name").First().Text() // check is username valid - if location == "" && joined.IsZero() { + if screenName == "" { return Profile{}, fmt.Errorf("either @%s does not exist or is private", username) } return Profile{ - Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""), - Banner: doc.Find(".ProfileCanopy-headerBg img").First().AttrOr("src", ""), - Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(), - Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""), - FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()), - FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()), - IsPrivate: doc.Find(".ProfileHeaderCard-badges .Icon--protected").First().Text() != "", - IsVerified: doc.Find(".ProfileHeaderCard-badges .Icon--verified").First().Text() != "", - Joined: &joined, - LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()), - Location: location, - Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(), - TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()), - URL: url, - UserID: doc.Find(".ProfileNav").First().AttrOr("data-user-id", ""), - Username: doc.Find(".u-linkComplex-target").First().Text(), - Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")), + Avatar: doc.Find("td.avatar > img").First().AttrOr("src", ""), + Biography: strings.TrimSpace(doc.Find(".bio").First().Text()), + FollowersCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(3) > a > div.statnum").First().Text()), + FollowingCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(2) > a > div.statnum").First().Text()), + IsPrivate: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "protected"), + IsVerified: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "verified"), + Location: strings.TrimSpace(doc.Find(".location").First().Text()), + Name: strings.TrimSpace(doc.Find(".fullname").First().Text()), + TweetsCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(1) > div.statnum").First().Text()), + URL: "https://twitter.com/" + screenName, + Username: screenName, + Website: strings.TrimSpace(doc.Find("div.url > div > a").First().AttrOr("data-url", "")), }, nil } -func parseCount(sel *goquery.Selection) (i int) { - if str, exists := sel.Attr("data-count"); exists { - i, _ = strconv.Atoi(str) - } +func parseCount(str string) (i int) { + i, _ = strconv.Atoi(strings.Replace(str, ",", "", -1)) return } diff --git a/profile_test.go b/profile_test.go index ce15c85..2b1af0c 100644 --- a/profile_test.go +++ b/profile_test.go @@ -2,29 +2,28 @@ package twitterscraper import ( "testing" - "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) func TestGetProfile(t *testing.T) { - loc := time.FixedZone("UTC", 0) - joined := time.Date(2007, 02, 20, 6, 35, 0, 0, loc) + // loc := time.FixedZone("UTC", 0) + // joined := time.Date(2007, 02, 20, 6, 35, 0, 0, loc) sample := Profile{ - Avatar: "https://pbs.twimg.com/profile_images/1288505768673603584/yTzT2JgU_400x400.png", - Banner: "https://pbs.twimg.com/profile_banners/783214/1596041768/1500x500", - Biography: "bye", - Birthday: "March 21", + Avatar: "https://pbs.twimg.com/profile_images/1293262551057420293/4QMAmava_normal.jpg", + // Banner: "https://pbs.twimg.com/profile_banners/783214/1596041768/1500x500", + Biography: "#BlackLivesMatter\n#BlackTransLivesMatter", + // Birthday: "March 21", IsPrivate: false, IsVerified: true, - Joined: &joined, - Location: "Space", - Name: "Twitter", - URL: "https://twitter.com/Twitter", - UserID: "783214", - Username: "Twitter", - Website: "https://about.twitter.com/", + // Joined: &joined, + Location: "everywhere", + Name: "Twitter", + URL: "https://twitter.com/Twitter", + // UserID: "783214", + Username: "Twitter", + Website: "about.twitter.com", } profile, err := GetProfile("Twitter") @@ -48,9 +47,9 @@ func TestGetProfile(t *testing.T) { if profile.FollowingCount == 0 { t.Error("Expected FollowingCount is greater than zero") } - if profile.LikesCount == 0 { - t.Error("Expected LikesCount is greater than zero") - } + // if profile.LikesCount == 0 { + // t.Error("Expected LikesCount is greater than zero") + // } if profile.TweetsCount == 0 { t.Error("Expected TweetsCount is greater than zero") }