twitter-scrapper/profile.go

96 lines
3 KiB
Go
Raw Normal View History

2019-09-21 10:59:45 +03:00
package twitterscraper
import (
"fmt"
"net/http"
2019-09-21 10:59:45 +03:00
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// Profile of twitter user.
2019-09-21 10:59:45 +03:00
type Profile struct {
Avatar string
2020-06-15 15:17:08 +03:00
Banner string
2019-09-21 10:59:45 +03:00
Biography string
Birthday string
FollowersCount int
FollowingCount int
IsPrivate bool
IsVerified bool
2019-09-21 10:59:45 +03:00
Joined *time.Time
LikesCount int
Location string
Name string
TweetsCount int
URL string
UserID string
2019-09-21 10:59:45 +03:00
Username string
Website string
}
// GetProfile return parsed user profile.
2019-09-21 10:59:45 +03:00
func GetProfile(username string) (Profile, error) {
url := "https://twitter.com/" + username
req, err := newRequest(url)
if err != nil {
return Profile{}, err
}
resp, err := http.DefaultClient.Do(req)
if resp == nil {
return Profile{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return Profile{}, fmt.Errorf("response status: %s", resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
2019-09-21 10:59:45 +03:00
if err != nil {
return Profile{}, err
}
2020-08-10 14:08:35 +03:00
// parse location
2019-09-21 10:59:45 +03:00
location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text())
// parse join date text
joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", ""))
2020-08-10 14:08:35 +03:00
// check is username valid
if location == "" && joined.IsZero() {
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}
2019-09-21 10:59:45 +03:00
return Profile{
Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""),
2020-06-15 15:17:08 +03:00
Banner: doc.Find(".ProfileCanopy-headerBg img").First().AttrOr("src", ""),
2019-09-21 10:59:45 +03:00
Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(),
Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""),
FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()),
FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()),
IsPrivate: doc.Find(".ProfileHeaderCard-badges .Icon--protected").First().Text() != "",
IsVerified: doc.Find(".ProfileHeaderCard-badges .Icon--verified").First().Text() != "",
2019-09-21 10:59:45 +03:00
Joined: &joined,
LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()),
Location: location,
Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(),
TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()),
URL: url,
2020-06-15 15:06:07 +03:00
UserID: doc.Find(".ProfileNav").First().AttrOr("data-user-id", ""),
2019-09-21 10:59:45 +03:00
Username: doc.Find(".u-linkComplex-target").First().Text(),
Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")),
}, nil
}
func parseCount(sel *goquery.Selection) (i int) {
if str, exists := sel.Attr("data-count"); exists {
i, _ = strconv.Atoi(str)
}
return
}