post - how to authenticate a shibboleth multi-hostname website with httr in R -


note: ipums international , ipums usa use same system. ipums usa allows quicker signup. if test out code, try https://usa.ipums.org/usa-action/users/request_access sign up!

i trying programmatically download file https://international.ipums.org/ r language , httr. need use httr , not rcurl because need post-authentication download large files not ram directly disk. this possible httr far know

the reproducible code below documents best effort @ getting login page (https://international.ipums.org/international-action/users/login) main post-authentication page. tips or hints appreciated! thanks!

my_email <- "email@address.com" my_password <- "password"  tf <- tempfile()  # use httr, because need download large file after authentication # , httr supports `write_disk()` option library(httr)  # turn off ssl verify, otherwise subsequent command fail set_config( config( ssl_verifypeer = 0l ) )  get( "https://international.ipums.org/shibboleth.sso/login?target=https%3a%2f%2finternational.ipums.org%2finternational-action%2fmenu" )  # connect starting login page of website ( <- get( "https://international.ipums.org/international-action/users/login" , verbose( info = true ) ) )  # takes me through lot of websites, (in browser) lands @ shibboleth_url <- "https://live.identity.popdata.org:443/idp/authn/userpassword"  # construct authentication information? base_values <- list( "j_username" = my_email , "j_password" = my_password ) idp_values <- list( "j_username" = my_email , "j_password" = my_password ,  "_idp_authn_lc_key"=subset( a$cookies , domain == "live.identity.popdata.org" )$value , "jsessionid" = subset( a$cookies , domain == "#httponly_live.identity.popdata.org" )$value ) ipums_values <- list( "j_username" = my_email , "j_password" = my_password ,  "_idp_authn_lc_key"=subset( a$cookies , domain == "live.identity.popdata.org" )$value , "jsessionid" = subset( a$cookies , domain == "international.ipums.org" )$value)  # believe main login should happen, looks it's failing get( shibboleth_url , query = idp_values ) post( shibboleth_url , body = base_values ) writebin( get( shibboleth_url , query = idp_values )$content , tf )  readlines( tf ) # mpc account authentication system has encountered error # error can occur if did not close browser after logging out of application previously.  may occur other reasons.  please close browser , try action again."                                                                        writebin( get( "https://live.identity.popdata.org/idp/profile/saml2/redirect/sso" , query = idp_values )$content , tf ) post( "https://live.identity.popdata.org/idp/profile/saml2/redirect/sso" , body = idp_values ) readlines( tf ) # same error above  # return main login page.. writebin( get( "https://international.ipums.org/international-action/menu" , query = ipums_values )$content , tf ) readlines( tf ) # ..not logged in 

you have use set_cookies() send cookies server:

library(httr) library(rvest) #my_email <- "xxx" #my_password <- "yyy" tf <- tempfile() set_config( config( ssl_verifypeer = 0l ) )  # first page p1 <- get( "https://international.ipums.org/international-action/users/login" , verbose( info = true ) )  # post login credentials b2 <- list( "j_username" = my_email , "j_password" = my_password ) c2 <- c(jsessionid=p1$cookies[p1$cookies$domain=="#httponly_live.identity.popdata.org",]$value,            `_idp_authn_lc_key`=p1$cookies[p1$cookies$domain=="live.identity.popdata.org",]$value) p2 <- post(p1$url,body = b2, set_cookies(.cookies = c2), encode="form" )  # parse hidden fields h2 <- read_html(p2$content) form <-  h2 %>% html_form()   # post hidden fields b3 <- list( "relaystate"=form[[1]]$fields[[1]]$value, "samlresponse"=form[[1]]$fields[[2]]$value) c3 <- c(jsessionid=p1$cookies[p1$cookies$domain=="#httponly_live.identity.popdata.org",]$value,            `_idp_session`=p2$cookies[p2$cookies$name=="_idp_session",]$value,            `_idp_authn_lc_key`=p2$cookies[p2$cookies$name=="_idp_authn_lc_key",]$value) p3 <- post( form[[1]]$url , body=b3, set_cookies(.cookies = c3), encode = "form")  # interesting page c4 <- c(jsessionid=p3$cookies[p1$cookies$domain=="international.ipums.org" && p3$cookies$name=="jsessionid",]$value,            `_idp_session`=p3$cookies[p3$cookies$name=="_idp_session",]$value,            `_idp_authn_lc_key`=p3$cookies[p3$cookies$name=="_idp_authn_lc_key",]$value) p4 <- get( "https://international.ipums.org/international-action/menu", set_cookies(.cookies = c4) ) writebin(p4$content , tf ) readlines( tf )[55] 

since result is

[1] "    <li class=\"lastitem\"><a href=\"/international-action/users/logout\">logout</a></li>" 

i think you're logged in...


Comments

Popular posts from this blog

get url and add instance to a model with prefilled foreign key :django admin -

css - Make div keyboard-scrollable in jQuery Mobile? -

ruby on rails - Seeing duplicate requests handled with Unicorn -