Scraping Counter Strike Match History
Seit ein paar Jahren ist Steam verprlichtet alle persönlichen Daten über einen offen zu legen. Das schließt auch einen Spielverlauf mit ein. Über diesen Weg kann man
#Extract Raw Tables
#Every Match Has Two Tables:
#Left Contains Gerneral Information about the Game, e.g. Duration, Map
#Right Contains Information about players, like the ingame Scoreboard
right_html_data <- read_html("./data/steam_data.html") %>%
html_nodes(".csgo_scoreboard_inner_right") %>%
html_table()
left_html_data <- read_html("./data/steam_data.html") %>%
html_nodes(".csgo_scoreboard_inner_left") %>%
html_table()
extract_right_side <- function(right_table) {
score_extr <- right_table %>% filter(row_number() == 6) %>% pull(Score) %>%
str_extract_all(., "\\d+") #Extract Game Score (Always in Row 6)
right_table %>%
as_tibble() %>%
rename(mvp = 6) %>%
clean_names() %>%
mutate(team_id = ifelse(row_number() <= 5,1,2),
score_t1 = score_extr[[1]][1],
score_t2 = score_extr[[1]][2],
mvp = str_replace(mvp, "\\u2605" , "star"),
mvp = ifelse(mvp == "star",1, mvp),
mvp = parse_number(mvp),
mvp = ifelse(is.na(mvp),0,mvp),
hsp = parse_number(hsp)) %>%
filter(row_number() != 6)
}
extract_left_side <- function(left_table) {
tibble(map = left_table[1][1,1],
timestamp = left_table[1][2,1],
wait_time = str_extract(left_table[1][3,1],"\\d+:\\d+"),
match_duration = str_extract(left_table[1][4,1],"\\d+:\\d+")) %>%
slice(rep(1:n(), each = 10))
}
right_data <- lapply(right_html_data, extract_right_side) %>% bind_rows()
left_data <- lapply(left_html_data, extract_left_side) %>% bind_rows()
cs_data <- left_data %>%
bind_cols(right_data) %>%
mutate(game_id = (row_number() - 1) %/% 10)
cs_data
## # A tibble: 2,210 x 16
## map timestamp wait_time match_duration player_name ping k a d
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Comp~ 2020-05-~ 00:12 32:31 Sergio Mar~ 43 17 5 18
## 2 Comp~ 2020-05-~ 00:12 32:31 Dadix 45 13 4 21
## 3 Comp~ 2020-05-~ 00:12 32:31 dex6y 30 10 6 19
## 4 Comp~ 2020-05-~ 00:12 32:31 duk5x 35 12 0 20
## 5 Comp~ 2020-05-~ 00:12 32:31 <U+2764> St<U+03AD>fa<U+03BD> -~ 48 10 1 18
## 6 Comp~ 2020-05-~ 00:12 32:31 /SlasH/ 57 26 5 12
## 7 Comp~ 2020-05-~ 00:12 32:31 ShokoN 31 21 4 13
## 8 Comp~ 2020-05-~ 00:12 32:31 CERNVNN0S 24 16 4 13
## 9 Comp~ 2020-05-~ 00:12 32:31 add instag~ 17 17 2 14
## 10 Comp~ 2020-05-~ 00:12 32:31 Awesome 49 14 2 12
## # ... with 2,200 more rows, and 7 more variables: mvp <dbl>, hsp <dbl>,
## # score <chr>, team_id <dbl>, score_t1 <chr>, score_t2 <chr>, game_id <dbl>