twitter-scrapper/profile.go

100 lines
2.8 KiB
Go
Raw Normal View History

2019-09-21 10:59:45 +03:00
package twitterscraper
import (
"fmt"
2020-12-03 21:42:16 +07:00
"net"
"net/http"
2019-09-21 10:59:45 +03:00
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// Profile of twitter user.
2019-09-21 10:59:45 +03:00
type Profile struct {
Avatar string
2020-06-15 15:17:08 +03:00
Banner string
2019-09-21 10:59:45 +03:00
Biography string
Birthday string
FollowersCount int
FollowingCount int
IsPrivate bool
IsVerified bool
2019-09-21 10:59:45 +03:00
Joined *time.Time
LikesCount int
Location string
Name string
TweetsCount int
URL string
UserID string
2019-09-21 10:59:45 +03:00
Username string
Website string
}
// GetProfile return parsed user profile.
2019-09-21 10:59:45 +03:00
func GetProfile(username string) (Profile, error) {
url := "https://mobile.twitter.com/" + username
2020-12-03 21:42:16 +07:00
client := http.DefaultClient
if HTTPProxy != nil {
client = &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(HTTPProxy),
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
}).DialContext,
},
}
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return Profile{}, err
}
req.Header.Set("Accept-Language", "en-US")
2020-12-03 21:42:16 +07:00
resp, err := client.Do(req)
if resp == nil {
return Profile{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return Profile{}, fmt.Errorf("response status: %s", resp.Status)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
2019-09-21 10:59:45 +03:00
if err != nil {
return Profile{}, err
}
// parse join date text
screenName := doc.Find(".screen-name").First().Text()
2019-09-21 10:59:45 +03:00
2020-08-10 14:08:35 +03:00
// check is username valid
if screenName == "" {
2020-08-10 14:08:35 +03:00
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}
2019-09-21 10:59:45 +03:00
return Profile{
Avatar: doc.Find("td.avatar > img").First().AttrOr("src", ""),
Biography: strings.TrimSpace(doc.Find(".bio").First().Text()),
FollowersCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(3) > a > div.statnum").First().Text()),
FollowingCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(2) > a > div.statnum").First().Text()),
IsPrivate: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "protected"),
IsVerified: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "verified"),
Location: strings.TrimSpace(doc.Find(".location").First().Text()),
Name: strings.TrimSpace(doc.Find(".fullname").First().Text()),
TweetsCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(1) > div.statnum").First().Text()),
URL: "https://twitter.com/" + screenName,
Username: screenName,
Website: strings.TrimSpace(doc.Find("div.url > div > a").First().AttrOr("data-url", "")),
2019-09-21 10:59:45 +03:00
}, nil
}
func parseCount(str string) (i int) {
i, _ = strconv.Atoi(strings.Replace(str, ",", "", -1))
2019-09-21 10:59:45 +03:00
return
}