twitter-scrapper/profile.go

91 lines
2.8 KiB
Go
Raw Normal View History

2019-09-21 10:59:45 +03:00
package twitterscraper
import (
"fmt"
"net/http"
2019-09-21 10:59:45 +03:00
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// Profile of twitter user.
2019-09-21 10:59:45 +03:00
type Profile struct {
Avatar string
Biography string
Birthday string
FollowersCount int
FollowingCount int
IsPrivate bool
IsVerified bool
2019-09-21 10:59:45 +03:00
Joined *time.Time
LikesCount int
Location string
Name string
TweetsCount int
URL string
UserID string
2019-09-21 10:59:45 +03:00
Username string
Website string
}
// GetProfile return parsed user profile.
2019-09-21 10:59:45 +03:00
func GetProfile(username string) (Profile, error) {
url := "https://twitter.com/" + username
req, err := newRequest(url)
if err != nil {
return Profile{}, err
}
resp, err := http.DefaultClient.Do(req)
if resp == nil {
return Profile{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return Profile{}, fmt.Errorf("response status: %s", resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
2019-09-21 10:59:45 +03:00
if err != nil {
return Profile{}, err
}
// parse location, also check is username valid
location := strings.TrimSpace(doc.Find(".ProfileHeaderCard-locationText.u-dir").First().Text())
if location == "" {
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}
// parse join date text
joined, _ := time.Parse("3:4 PM - 2 Jan 2006", doc.Find(".ProfileHeaderCard-joinDateText.u-dir").First().AttrOr("title", ""))
return Profile{
Avatar: doc.Find(".ProfileAvatar-image").First().AttrOr("src", ""),
Biography: doc.Find(".ProfileHeaderCard-bio.u-dir").First().Text(),
Birthday: strings.ReplaceAll(strings.TrimSpace(doc.Find(".ProfileHeaderCard-birthdateText.u-dir").First().Text()), "Born ", ""),
FollowersCount: parseCount(doc.Find(".ProfileNav-item--followers > a > span.ProfileNav-value").First()),
FollowingCount: parseCount(doc.Find(".ProfileNav-item--following > a > span.ProfileNav-value").First()),
IsPrivate: doc.Find(".ProfileHeaderCard-badges .Icon--protected").First().Text() != "",
IsVerified: doc.Find(".ProfileHeaderCard-badges .Icon--verified").First().Text() != "",
2019-09-21 10:59:45 +03:00
Joined: &joined,
LikesCount: parseCount(doc.Find(".ProfileNav-item--favorites > a > span.ProfileNav-value").First()),
Location: location,
Name: doc.Find(".ProfileHeaderCard-nameLink").First().Text(),
TweetsCount: parseCount(doc.Find(".ProfileNav-item--tweets.is-active > a > span.ProfileNav-value").First()),
URL: url,
Username: doc.Find(".u-linkComplex-target").First().Text(),
Website: strings.TrimSpace(doc.Find(".ProfileHeaderCard-urlText.u-dir > a").First().AttrOr("title", "")),
}, nil
}
func parseCount(sel *goquery.Selection) (i int) {
if str, exists := sel.Attr("data-count"); exists {
i, _ = strconv.Atoi(str)
}
return
}