69 lines
2.3 KiB
Go
69 lines
2.3 KiB
Go
|
|
package twitterscraper
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
|
"strconv"
|
||
|
|
"strings"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"github.com/PuerkitoBio/goquery"
|
||
|
|
)
|
||
|
|
|
||
|
|
// Profile of twitter user
|
||
|
|
type Profile struct {
|
||
|
|
Avatar string
|
||
|
|
Biography string
|
||
|
|
Birthday string
|
||
|
|
FollowersCount int
|
||
|
|
FollowingCount int
|
||
|
|
Joined *time.Time
|
||
|
|
LikesCount int
|
||
|
|
Location string
|
||
|
|
Name string
|
||
|
|
TweetsCount int
|
||
|
|
URL string
|
||
|
|
Username string
|
||
|
|
Website string
|
||
|
|
}
|
||
|
|
|
||
|
|
// GetProfile return parsed user profile
|
||
|
|
func GetProfile(username string) (Profile, error) {
|
||
|
|
url := "https://twitter.com/" + username
|
||
|
|
doc, err := goquery.NewDocument(url)
|
||
|
|
if err != nil {
|
||
|
|
return Profile{}, err
|
||
|
|
}
|
||
|
|
|
||
|
|
// parse location, also check is username valid
|
||
|
|
location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text())
|
||
|
|
if location == "" {
|
||
|
|
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
|
||
|
|
}
|
||
|
|
|
||
|
|
// parse join date text
|
||
|
|
joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", ""))
|
||
|
|
|
||
|
|
return Profile{
|
||
|
|
Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""),
|
||
|
|
Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(),
|
||
|
|
Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""),
|
||
|
|
FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()),
|
||
|
|
FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()),
|
||
|
|
Joined: &joined,
|
||
|
|
LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()),
|
||
|
|
Location: location,
|
||
|
|
Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(),
|
||
|
|
TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()),
|
||
|
|
URL: url,
|
||
|
|
Username: doc.Find(".u-linkComplex-target").First().Text(),
|
||
|
|
Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")),
|
||
|
|
}, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseCount(sel *goquery.Selection) (i int) {
|
||
|
|
if str, exists := sel.Attr("data-count"); exists {
|
||
|
|
i, _ = strconv.Atoi(str)
|
||
|
|
}
|
||
|
|
return
|
||
|
|
}
|