Scraping Counter Strike Match History

Seit ein paar Jahren ist Steam verprlichtet alle persönlichen Daten über einen offen zu legen. Das schließt auch einen Spielverlauf mit ein. Über diesen Weg kann man

#Extract Raw Tables
#Every Match Has Two Tables:
#Left Contains Gerneral Information about the Game, e.g. Duration, Map
#Right Contains Information about players, like the ingame Scoreboard

right_html_data <- read_html("./data/steam_data.html") %>% 
  html_nodes(".csgo_scoreboard_inner_right") %>% 
  html_table()

left_html_data <- read_html("./data/steam_data.html") %>% 
  html_nodes(".csgo_scoreboard_inner_left") %>% 
  html_table()
extract_right_side <- function(right_table) {
  score_extr <- right_table %>% filter(row_number() == 6) %>% pull(Score) %>% 
    str_extract_all(., "\\d+") #Extract Game Score (Always in Row 6)
  
  right_table %>% 
    as_tibble() %>% 
    rename(mvp = 6) %>% 
    clean_names() %>% 
    mutate(team_id = ifelse(row_number() <= 5,1,2),
         score_t1 = score_extr[[1]][1],
         score_t2 = score_extr[[1]][2],
         mvp = str_replace(mvp, "\\u2605" , "star"),
         mvp = ifelse(mvp == "star",1, mvp),
         mvp = parse_number(mvp),
         mvp = ifelse(is.na(mvp),0,mvp),
         hsp = parse_number(hsp)) %>% 
    filter(row_number() != 6)
}
extract_left_side <- function(left_table) {
  tibble(map = left_table[1][1,1],
           timestamp = left_table[1][2,1],
           wait_time = str_extract(left_table[1][3,1],"\\d+:\\d+"),
           match_duration = str_extract(left_table[1][4,1],"\\d+:\\d+")) %>% 
    slice(rep(1:n(), each = 10))
}
right_data <- lapply(right_html_data, extract_right_side) %>% bind_rows()
left_data <- lapply(left_html_data, extract_left_side) %>% bind_rows()

cs_data <- left_data %>%
  bind_cols(right_data) %>%
  mutate(game_id = (row_number() - 1) %/% 10)

cs_data
## # A tibble: 2,210 x 16
##    map   timestamp wait_time match_duration player_name ping  k     a     d    
##    <chr> <chr>     <chr>     <chr>          <chr>       <chr> <chr> <chr> <chr>
##  1 Comp~ 2020-05-~ 00:12     32:31          Sergio Mar~ 43    17    5     18   
##  2 Comp~ 2020-05-~ 00:12     32:31          Dadix       45    13    4     21   
##  3 Comp~ 2020-05-~ 00:12     32:31          dex6y       30    10    6     19   
##  4 Comp~ 2020-05-~ 00:12     32:31          duk5x       35    12    0     20   
##  5 Comp~ 2020-05-~ 00:12     32:31          <U+2764> St<U+03AD>fa<U+03BD> -~ 48    10    1     18   
##  6 Comp~ 2020-05-~ 00:12     32:31          /SlasH/     57    26    5     12   
##  7 Comp~ 2020-05-~ 00:12     32:31          ShokoN      31    21    4     13   
##  8 Comp~ 2020-05-~ 00:12     32:31          CERNVNN0S   24    16    4     13   
##  9 Comp~ 2020-05-~ 00:12     32:31          add instag~ 17    17    2     14   
## 10 Comp~ 2020-05-~ 00:12     32:31          Awesome     49    14    2     12   
## # ... with 2,200 more rows, and 7 more variables: mvp <dbl>, hsp <dbl>,
## #   score <chr>, team_id <dbl>, score_t1 <chr>, score_t2 <chr>, game_id <dbl>
Max Hübner
Max Hübner
Computer Science Student

Computer Science Student from Germany

Related