2019-09-21 10:59:45 +03:00
|
|
|
package twitterscraper
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
2020-12-03 21:42:16 +07:00
|
|
|
"net"
|
2020-05-14 18:00:43 +02:00
|
|
|
"net/http"
|
2019-09-21 10:59:45 +03:00
|
|
|
"strconv"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
|
)
|
|
|
|
|
|
2020-05-14 21:52:55 +03:00
|
|
|
// Profile of twitter user.
|
2019-09-21 10:59:45 +03:00
|
|
|
type Profile struct {
|
|
|
|
|
Avatar string
|
2020-06-15 15:17:08 +03:00
|
|
|
Banner string
|
2019-09-21 10:59:45 +03:00
|
|
|
Biography string
|
|
|
|
|
Birthday string
|
|
|
|
|
FollowersCount int
|
|
|
|
|
FollowingCount int
|
2020-06-15 14:58:18 +03:00
|
|
|
IsPrivate bool
|
|
|
|
|
IsVerified bool
|
2019-09-21 10:59:45 +03:00
|
|
|
Joined *time.Time
|
|
|
|
|
LikesCount int
|
|
|
|
|
Location string
|
|
|
|
|
Name string
|
|
|
|
|
TweetsCount int
|
|
|
|
|
URL string
|
2020-06-15 14:58:18 +03:00
|
|
|
UserID string
|
2019-09-21 10:59:45 +03:00
|
|
|
Username string
|
|
|
|
|
Website string
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-14 21:52:55 +03:00
|
|
|
// GetProfile return parsed user profile.
|
2019-09-21 10:59:45 +03:00
|
|
|
func GetProfile(username string) (Profile, error) {
|
2020-08-13 17:38:46 +03:00
|
|
|
url := "https://mobile.twitter.com/" + username
|
2020-05-14 18:00:43 +02:00
|
|
|
|
2020-12-03 21:42:16 +07:00
|
|
|
client := http.DefaultClient
|
|
|
|
|
if HTTPProxy != nil {
|
|
|
|
|
client = &http.Client{
|
|
|
|
|
Transport: &http.Transport{
|
|
|
|
|
Proxy: http.ProxyURL(HTTPProxy),
|
|
|
|
|
DialContext: (&net.Dialer{
|
|
|
|
|
Timeout: 10 * time.Second,
|
|
|
|
|
}).DialContext,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-13 17:38:46 +03:00
|
|
|
req, err := http.NewRequest("GET", url, nil)
|
2020-05-14 18:00:43 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return Profile{}, err
|
|
|
|
|
}
|
2020-08-13 17:38:46 +03:00
|
|
|
req.Header.Set("Accept-Language", "en-US")
|
2020-05-14 18:00:43 +02:00
|
|
|
|
2020-12-03 21:42:16 +07:00
|
|
|
resp, err := client.Do(req)
|
2020-05-14 21:52:55 +03:00
|
|
|
if resp == nil {
|
2020-05-14 18:00:43 +02:00
|
|
|
return Profile{}, err
|
|
|
|
|
}
|
2020-05-14 21:52:55 +03:00
|
|
|
defer resp.Body.Close()
|
2020-05-14 18:00:43 +02:00
|
|
|
|
2020-05-14 21:52:55 +03:00
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return Profile{}, fmt.Errorf("response status: %s", resp.Status)
|
|
|
|
|
}
|
2020-05-14 18:00:43 +02:00
|
|
|
|
2020-05-14 21:52:55 +03:00
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
2019-09-21 10:59:45 +03:00
|
|
|
if err != nil {
|
|
|
|
|
return Profile{}, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// parse join date text
|
2020-08-13 17:38:46 +03:00
|
|
|
screenName := doc.Find(".screen-name").First().Text()
|
2019-09-21 10:59:45 +03:00
|
|
|
|
2020-08-10 14:08:35 +03:00
|
|
|
// check is username valid
|
2020-08-13 17:38:46 +03:00
|
|
|
if screenName == "" {
|
2020-08-10 14:08:35 +03:00
|
|
|
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-21 10:59:45 +03:00
|
|
|
return Profile{
|
2020-08-13 17:38:46 +03:00
|
|
|
Avatar: doc.Find("td.avatar > img").First().AttrOr("src", ""),
|
|
|
|
|
Biography: strings.TrimSpace(doc.Find(".bio").First().Text()),
|
|
|
|
|
FollowersCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(3) > a > div.statnum").First().Text()),
|
|
|
|
|
FollowingCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(2) > a > div.statnum").First().Text()),
|
|
|
|
|
IsPrivate: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "protected"),
|
|
|
|
|
IsVerified: strings.Contains(doc.Find("div.fullname > a.badge > img").First().AttrOr("src", ""), "verified"),
|
|
|
|
|
Location: strings.TrimSpace(doc.Find(".location").First().Text()),
|
|
|
|
|
Name: strings.TrimSpace(doc.Find(".fullname").First().Text()),
|
|
|
|
|
TweetsCount: parseCount(doc.Find("table.profile-stats > tbody > tr > td:nth-child(1) > div.statnum").First().Text()),
|
|
|
|
|
URL: "https://twitter.com/" + screenName,
|
|
|
|
|
Username: screenName,
|
|
|
|
|
Website: strings.TrimSpace(doc.Find("div.url > div > a").First().AttrOr("data-url", "")),
|
2019-09-21 10:59:45 +03:00
|
|
|
}, nil
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-13 17:38:46 +03:00
|
|
|
func parseCount(str string) (i int) {
|
|
|
|
|
i, _ = strconv.Atoi(strings.Replace(str, ",", "", -1))
|
2019-09-21 10:59:45 +03:00
|
|
|
return
|
|
|
|
|
}
|