post - how to authenticate a shibboleth multi-hostname website with httr in R -
note: ipums international , ipums usa use same system. ipums usa allows quicker signup. if test out code, try https://usa.ipums.org/usa-action/users/request_access sign up!
i trying programmatically download file https://international.ipums.org/ r language , httr. need use httr , not rcurl because need post-authentication download large files not ram directly disk. this possible httr
far know
the reproducible code below documents best effort @ getting login page (https://international.ipums.org/international-action/users/login) main post-authentication page. tips or hints appreciated! thanks!
my_email <- "email@address.com" my_password <- "password" tf <- tempfile() # use httr, because need download large file after authentication # , httr supports `write_disk()` option library(httr) # turn off ssl verify, otherwise subsequent command fail set_config( config( ssl_verifypeer = 0l ) ) get( "https://international.ipums.org/shibboleth.sso/login?target=https%3a%2f%2finternational.ipums.org%2finternational-action%2fmenu" ) # connect starting login page of website ( <- get( "https://international.ipums.org/international-action/users/login" , verbose( info = true ) ) ) # takes me through lot of websites, (in browser) lands @ shibboleth_url <- "https://live.identity.popdata.org:443/idp/authn/userpassword" # construct authentication information? base_values <- list( "j_username" = my_email , "j_password" = my_password ) idp_values <- list( "j_username" = my_email , "j_password" = my_password , "_idp_authn_lc_key"=subset( a$cookies , domain == "live.identity.popdata.org" )$value , "jsessionid" = subset( a$cookies , domain == "#httponly_live.identity.popdata.org" )$value ) ipums_values <- list( "j_username" = my_email , "j_password" = my_password , "_idp_authn_lc_key"=subset( a$cookies , domain == "live.identity.popdata.org" )$value , "jsessionid" = subset( a$cookies , domain == "international.ipums.org" )$value) # believe main login should happen, looks it's failing get( shibboleth_url , query = idp_values ) post( shibboleth_url , body = base_values ) writebin( get( shibboleth_url , query = idp_values )$content , tf ) readlines( tf ) # mpc account authentication system has encountered error # error can occur if did not close browser after logging out of application previously. may occur other reasons. please close browser , try action again." writebin( get( "https://live.identity.popdata.org/idp/profile/saml2/redirect/sso" , query = idp_values )$content , tf ) post( "https://live.identity.popdata.org/idp/profile/saml2/redirect/sso" , body = idp_values ) readlines( tf ) # same error above # return main login page.. writebin( get( "https://international.ipums.org/international-action/menu" , query = ipums_values )$content , tf ) readlines( tf ) # ..not logged in
you have use set_cookies()
send cookies server:
library(httr) library(rvest) #my_email <- "xxx" #my_password <- "yyy" tf <- tempfile() set_config( config( ssl_verifypeer = 0l ) ) # first page p1 <- get( "https://international.ipums.org/international-action/users/login" , verbose( info = true ) ) # post login credentials b2 <- list( "j_username" = my_email , "j_password" = my_password ) c2 <- c(jsessionid=p1$cookies[p1$cookies$domain=="#httponly_live.identity.popdata.org",]$value, `_idp_authn_lc_key`=p1$cookies[p1$cookies$domain=="live.identity.popdata.org",]$value) p2 <- post(p1$url,body = b2, set_cookies(.cookies = c2), encode="form" ) # parse hidden fields h2 <- read_html(p2$content) form <- h2 %>% html_form() # post hidden fields b3 <- list( "relaystate"=form[[1]]$fields[[1]]$value, "samlresponse"=form[[1]]$fields[[2]]$value) c3 <- c(jsessionid=p1$cookies[p1$cookies$domain=="#httponly_live.identity.popdata.org",]$value, `_idp_session`=p2$cookies[p2$cookies$name=="_idp_session",]$value, `_idp_authn_lc_key`=p2$cookies[p2$cookies$name=="_idp_authn_lc_key",]$value) p3 <- post( form[[1]]$url , body=b3, set_cookies(.cookies = c3), encode = "form") # interesting page c4 <- c(jsessionid=p3$cookies[p1$cookies$domain=="international.ipums.org" && p3$cookies$name=="jsessionid",]$value, `_idp_session`=p3$cookies[p3$cookies$name=="_idp_session",]$value, `_idp_authn_lc_key`=p3$cookies[p3$cookies$name=="_idp_authn_lc_key",]$value) p4 <- get( "https://international.ipums.org/international-action/menu", set_cookies(.cookies = c4) ) writebin(p4$content , tf ) readlines( tf )[55]
since result is
[1] " <li class=\"lastitem\"><a href=\"/international-action/users/logout\">logout</a></li>"
i think you're logged in...
Comments
Post a Comment