diff --git a/README.md b/README.md index fe67fcd..51c4ddc 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ You can use this library to get the text of any user's Tweets trivially. ## Usage +### Get tweets + ```golang package main @@ -30,6 +32,25 @@ func main() { It appears you can ask for up to 25 pages of tweets reliably (~486 tweets). +### Get profile + +```golang +package main + +import ( + "fmt" + twitterscraper "github.com/n0madic/twitter-scraper" +) + +func main() { + profile, err := twitterscraper.GetProfile("kennethreitz") + if err != nil { + panic(err) + } + fmt.Printf("%+v\n", profile) +} +``` + ## Installation ```shell diff --git a/profile.go b/profile.go new file mode 100644 index 0000000..fce3711 --- /dev/null +++ b/profile.go @@ -0,0 +1,68 @@ +package twitterscraper + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// Profile of twitter user +type Profile struct { + Avatar string + Biography string + Birthday string + FollowersCount int + FollowingCount int + Joined *time.Time + LikesCount int + Location string + Name string + TweetsCount int + URL string + Username string + Website string +} + +// GetProfile return parsed user profile +func GetProfile(username string) (Profile, error) { + url := "https://twitter.com/" + username + doc, err := goquery.NewDocument(url) + if err != nil { + return Profile{}, err + } + + // parse location, also check is username valid + location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text()) + if location == "" { + return Profile{}, fmt.Errorf("either @%s does not exist or is private", username) + } + + // parse join date text + joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", "")) + + return Profile{ + Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""), + Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(), + Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""), + FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()), + FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()), + Joined: &joined, + LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()), + Location: location, + Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(), + TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()), + URL: url, + Username: doc.Find(".u-linkComplex-target").First().Text(), + Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")), + }, nil +} + +func parseCount(sel *goquery.Selection) (i int) { + if str, exists := sel.Attr("data-count"); exists { + i, _ = strconv.Atoi(str) + } + return +}