Skip to content

Commit

Permalink
coolstuffinc/sealed: Add a retail scraper too
Browse files Browse the repository at this point in the history
  • Loading branch information
kodawah committed Oct 10, 2023
1 parent eb347a5 commit 961eb02
Show file tree
Hide file tree
Showing 4 changed files with 312 additions and 33 deletions.
107 changes: 85 additions & 22 deletions coolstuffinc/preprocess.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,10 +488,20 @@ var sealedRenames = map[string]string{

"Starter - Booster Box": "Starter 1999 Booster Box",
"Starter - Booster Pack": "Starter 1999 Booster Pack",

"Mystery Booster (Convention Edition) - Booster Box": "Mystery Booster Booster Box (Convention Edition)",
"Mystery Booster (Convention Edition) - Booster Pack": "Mystery Booster Booster Pack (Convention Edition)",
"Mystery Booster (Convention Edition 2021) - Booster Box": "Mystery Booster Booster Box (Convention Edition - 2021)",
"Mystery Booster (Convention Edition 2021) - Booster Pack": "Mystery Booster Booster Pack (Convention Edition - 2021)",

"Secret Lair Drop Series - Ultimate Edition": "Secret Lair Ultimate Edition Box",
"Phyrexia: All Will Be One - Bundle: Compleat Edition": "Phyrexia All Will Be One Compleat Bundle",
}

func preprocessSealed(productName, edition string) (string, error) {
switch edition {
case "Mystery Booster - The List":
edition = "MB1"
case "World Championship Decks":
// WCD products are merged in a single edition in mtgjson
edition = "WC97"
Expand All @@ -502,57 +512,110 @@ func preprocessSealed(productName, edition string) (string, error) {
productName = strings.TrimSuffix(productName, "Deck")
productName = year + " " + productName
}
case "Secret Lair":
if strings.Contains(productName, "Ultimate") {
edition = "SLU"
} else {
edition = "SLD"
}
case "Draft", "Magic":
return "", errors.New("unsupported")
default:
if strings.Contains(productName, "Challenger Deck") {
edition = ""
}
}

// If edition is empty, do not return and instead loop through
var setCode string
set, err := mtgmatcher.GetSetByName(edition)
if err != nil {
return "", err
if edition != "" {
return "", err
}
} else {
setCode = set.Code
}

productName = strings.TrimSuffix(productName, " (1)")
productName = strings.Replace(productName, "(6)", "Case", 1)

rename, found := sealedRenames[productName]
if found {
productName = rename
}

switch {
case strings.Contains(productName, "Life Counter"):
case strings.Contains(productName, "Life Counter"),
strings.Contains(productName, "Booster Box (3)"),
strings.Contains(productName, "Scene Box"),
strings.Contains(productName, "Player's Guide"),
strings.Contains(productName, "Bundle Card Box"),
strings.Contains(productName, "D20 Set"),
strings.Contains(productName, "Born of the Gods - Japanese"),
strings.Contains(productName, "Variety Pack"):
return "", errors.New("unsupported")
case strings.HasPrefix(productName, "From the Vault"),
strings.HasPrefix(productName, "Signature Spellbook"):
productName = strings.TrimSuffix(productName, " - Box Set")
}

var uuid string
for _, sealedProduct := range set.SealedProduct {
if mtgmatcher.SealedEquals(sealedProduct.Name, productName) {
uuid = sealedProduct.UUID
break
for _, set := range mtgmatcher.GetSets() {
if setCode != "" && setCode != set.Code {
continue
}
}

if uuid == "" {
for _, sealedProduct := range set.SealedProduct {
// If not found, look if the a chunk of the name is present in the deck name
switch {
case strings.Contains(productName, "Archenemy"),
strings.Contains(productName, "uels of the Planeswalkers "),
strings.Contains(productName, "Planechase"):
decks, found := sealedProduct.Contents["deck"]
if found {
for _, deck := range decks {
// Work around internal names that are too long, like
// "Teeth of the Predator - the Garruk Wildspeaker Deck"
deckName := strings.Split(deck.Name, " - ")[0]
if mtgmatcher.SealedContains(productName, deckName) {
uuid = sealedProduct.UUID
break
if mtgmatcher.SealedEquals(sealedProduct.Name, productName) {
uuid = sealedProduct.UUID
break
}
}

if uuid == "" {
for _, sealedProduct := range set.SealedProduct {
// If not found, look if the a chunk of the name is present in the deck name
switch {
case strings.Contains(productName, "Archenemy"),
strings.Contains(productName, "Duels of the Planeswalkers"),
strings.Contains(productName, "Commander"),
strings.Contains(productName, "Secret Lair"),
strings.Contains(productName, "Planechase"):
decks, found := sealedProduct.Contents["deck"]
if found {
for _, deck := range decks {
// Work around internal names that are too long, like
// "Teeth of the Predator - the Garruk Wildspeaker Deck"
deckName := strings.Split(deck.Name, " - ")[0]
if mtgmatcher.SealedContains(productName, deckName) {
uuid = sealedProduct.UUID
break
}
// Scret Lair may have
deckName = strings.TrimSuffix(strings.ToLower(deckName), " foil")
if mtgmatcher.SealedContains(productName, deckName) {
uuid = sealedProduct.UUID
break
}
}
}
}
if uuid != "" {
break
}
}
}

// Last chance (in case edition is known)
if uuid == "" && setCode != "" && len(set.SealedProduct) == 1 {
uuid = set.SealedProduct[0].UUID
}

if uuid != "" {
break
}

}

return uuid, nil
Expand Down
179 changes: 178 additions & 1 deletion coolstuffinc/sealed.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package coolstuffinc

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/url"
"strconv"
Expand All @@ -19,16 +21,19 @@ type CoolstuffincSealed struct {
LogCallback mtgban.LogCallbackFunc
Partner string

inventoryDate time.Time
buylistDate time.Time
MaxConcurrency int

buylist mtgban.BuylistRecord
inventory mtgban.InventoryRecord
buylist mtgban.BuylistRecord

httpclient *http.Client
}

func NewScraperSealed() *CoolstuffincSealed {
csi := CoolstuffincSealed{}
csi.inventory = mtgban.InventoryRecord{}
csi.buylist = mtgban.BuylistRecord{}
csi.httpclient = http.NewClient()
csi.httpclient.Logger = nil
Expand All @@ -42,6 +47,177 @@ func (csi *CoolstuffincSealed) printf(format string, a ...interface{}) {
}
}

const sealedURL = "https://www.coolstuffinc.com/sq/1555582?page=1&sb=price|desc"

func (csi *CoolstuffincSealed) numOfPages() (int, error) {
resp, err := csi.httpclient.Get(sealedURL)
if err != nil {
return 0, err
}

defer resp.Body.Close()

doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return 0, err
}

text := doc.Find(".search-result-links").Text()
text = strings.TrimPrefix(strings.Split(text, " Results")[0], "1 - ")

fields := strings.Split(text, " of ")
if len(fields) != 2 {
return 0, errors.New("unknown page format")
}

resultsPerPage, err := strconv.Atoi(fields[0])
if err != nil {
return 0, err
}

resultsTotal, err := strconv.Atoi(fields[1])
if err != nil {
return 0, err
}

return resultsTotal/resultsPerPage + 1, nil
}

func (csi *CoolstuffincSealed) processSealedPage(channel chan<- responseChan, page int) error {
u, err := url.Parse(sealedURL)
if err != nil {
return err
}

v := u.Query()
v.Set("page", fmt.Sprint(page))
u.RawQuery = v.Encode()

resp, err := csi.httpclient.Get(u.String())
if err != nil {
return err
}
defer resp.Body.Close()

doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return err
}

doc.Find(".main-container").Each(func(i int, s *goquery.Selection) {
productName := s.Find(`span[itemprop="name"]`).Text()

edition := s.Find(`div[class="breadcrumb-trail"]`).Text()
edition = strings.TrimPrefix(edition, "Magic: The Gathering » ")

uuid, err := preprocessSealed(productName, edition)
if (err != nil || uuid == "") && strings.Contains(productName, "Commander") && !strings.Contains(edition, "Commander") {
uuid, err = preprocessSealed(productName, edition+" Commander")
}
if err != nil {
if err.Error() != "unsupported" {
csi.printf("%s in %s | %s", productName, edition, err.Error())
}
return
}

if uuid == "" {
csi.printf("unable to parse %s in %s", productName, edition)
return
}

qtyStr := s.Find(`span[class="card-qty"]`).Text()
qtyStr = strings.TrimSuffix(qtyStr, "+")
qty, err := strconv.Atoi(qtyStr)
if err != nil {
qty = 20
}

priceStr := s.Find(`b[itemprop="price"]`).Text()
price, err := strconv.ParseFloat(priceStr, 64)
if err != nil {
csi.printf("%s: %s", productName, err.Error())
return
}

path, _ := s.Find(`a[class="productLink"]`).Attr("href")
link := "https://coolstuffinc.com" + path

out := responseChan{
cardId: uuid,
invEntry: &mtgban.InventoryEntry{
Price: price,
Quantity: qty,
URL: link,
},
}

channel <- out
})

return nil
}

func (csi *CoolstuffincSealed) scrape() error {
totalPages, err := csi.numOfPages()
if err != nil {
return err
}
csi.printf("Processing %d pages", totalPages)

pages := make(chan int)
results := make(chan responseChan)
var wg sync.WaitGroup

for i := 0; i < csi.MaxConcurrency; i++ {
wg.Add(1)
go func() {
for page := range pages {
err := csi.processSealedPage(results, page)
if err != nil {
csi.printf("page %d: %s", page, err.Error())
}
}
wg.Done()
}()
}

go func() {
for i := 1; i <= totalPages; i++ {
pages <- i
}
close(pages)

wg.Wait()
close(results)
}()

for record := range results {
err := csi.inventory.Add(record.cardId, record.invEntry)
if err != nil {
csi.printf("%s", err.Error())
continue
}
}

csi.inventoryDate = time.Now()

return nil
}

func (csi *CoolstuffincSealed) Inventory() (mtgban.InventoryRecord, error) {
if len(csi.inventory) > 0 {
return csi.inventory, nil
}

err := csi.scrape()
if err != nil {
return nil, err
}

return csi.inventory, nil
}

func (csi *CoolstuffincSealed) processPage(channel chan<- responseChan, edition string) error {
resp, err := csi.httpclient.PostForm(csiBuylistURL, url.Values{
"ajaxtype": {"selectProductSetName2"},
Expand Down Expand Up @@ -201,6 +377,7 @@ func (csi *CoolstuffincSealed) Buylist() (mtgban.BuylistRecord, error) {
func (csi *CoolstuffincSealed) Info() (info mtgban.ScraperInfo) {
info.Name = "Cool Stuff Inc"
info.Shorthand = "CSISealed"
info.InventoryTimestamp = &csi.inventoryDate
info.BuylistTimestamp = &csi.buylistDate
info.SealedMode = true
return
Expand Down
Loading

0 comments on commit 961eb02

Please sign in to comment.