Mike Malecki, YouGov
cumulativechange
wtfm
head(df)
## caseid weight region fips gender age race4 educyrs marstat child18 pid3
## 1 1 0.46 1 42 1 54 1 18 5 2 1
## 2 2 2.09 1 42 1 77 1 10 1 2 2
## 3 3 1.54 3 24 1 27 1 12 6 2 1
## 4 4 1.08 3 51 2 28 1 14 1 1 2
## 5 5 1.39 3 54 1 21 1 14 5 2 2
## 6 6 0.27 1 42 2 79 1 16 4 2 1
## pid5 ideo5 date source reg income ordered.party y female race.female
## 1 1 1 20120105 gallup 1 600 1 1 0 1
## 2 5 4 20120105 gallup 1 600 5 5 0 1
## 3 1 1 20120105 gallup 1 600 1 1 0 1
## 4 5 4 20120105 gallup 1 600 5 5 1 5
## 5 5 4 20120105 gallup 1 600 5 5 0 1
## 6 1 2 20120105 gallup 1 600 1 1 1 5
## state edu.cat age.cat age.edu survey
## 1 PA 4 3 15 4
## 2 PA 1 4 4 4
## 3 MD 1 1 1 4
## 4 VA 2 1 5 4
## 5 WV 2 1 5 4
## 6 PA 3 4 12 4
Y ∼ ologit(μ[i], τ)
μ[i] ∼ ηstatestate[i]] + ηracerace[i] + ηeduedu[i] + …
state (51)
ηblock ∼ N(0, σblock)
ηstate ∼ N(αregionregion[state] + Xβstatepred[state], σstate)
m.Dem <- mrp( party==1 ~ race.female + age + edu + state,
data=df, add=list(
df.statepreds,
expression(race.edu <-
interaction(race.female, education))),
ml.formula=.~.+(1|race.edu) + statepred1 + statepred2 )
model{
for(i in 1:N){ ## loop over observations
## form the linear predictor (no intercept)
mu[i] <-
eta.race.female[race.female[i]]
+ eta.age[age.cat[i]]
+ eta.edu[edu.cat[i]]
+ eta.age.edu[age.edu[i]]
+ eta.poll[survey[i]]
+ eta.state[state[i]]
## for (j in group) {
## g.raw[j] ~ dnorm ( mu.g.raw, tau.g.raw )
## g[k] <- xi.g*(g.raw[j] - mean(g.raw[]) )
## }
## xi.g ~ dunif(0,100)
## mu.g.raw ~ dnorm
## tau.g.raw <- pow(sig.g.raw, -2)
## sigma.g.raw <- dunif ()
## sigma.g <- xi.g* sigma.g.raw
...
And you get back distributions like this. – houseeffects
predict()-like function for multilevel or mixed-effects models!arm::sim() does this for lme/blme-class point estimates. (lme-class and blme-class) fit through some kind of REML.model.matrixhead(xf.all)
## edu age race.female state
## 1 <HS 18-29 White M AK
## 2 HS 18-29 White M AK
## 3 Some 18-29 White M AK
## 4 Grad 18-29 White M AK
## 5 <HS 30-44 White M AK
## 6 HS 30-44 White M AK
head(xm.all)
## edu<HS eduHS eduSome eduGrad age18-29 age30-44 age45-64 age≥65
## 1 1 0 0 0 1 0 0 0
## 2 0 1 0 0 1 0 0 0
## 3 0 0 1 0 1 0 0 0
## 4 0 0 0 1 1 0 0 0
## 5 1 0 0 0 0 1 0 0
## 6 0 1 0 0 0 1 0 0
## race.femaleWhite M race.femaleBlack M race.femaleHispanic M
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## race.femaleOther M race.femaleWhite F race.femaleBlack F
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## race.femaleHispanic F race.femaleOther F stateAK stateAL stateAR stateAZ
## 1 0 0 1 0 0 0
## 2 0 0 1 0 0 0
## 3 0 0 1 0 0 0
## 4 0 0 1 0 0 0
## 5 0 0 1 0 0 0
## 6 0 0 1 0 0 0
## stateCA stateCO stateCT stateDC stateDE stateFL stateGA stateHI stateIA
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## stateID stateIL stateIN stateKS stateKY stateLA stateMA stateMD stateME
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## stateMI stateMN stateMO stateMS stateMT stateNC stateND stateNE stateNH
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## stateNJ stateNM stateNV stateNY stateOH stateOK stateOR statePA stateRI
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## stateSC stateSD stateTN stateTX stateUT stateVA stateVT stateWA stateWI
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## stateWV stateWY edu<HS:age18-29 eduHS:age18-29 eduSome:age18-29
## 1 0 0 1 0 0
## 2 0 0 0 1 0
## 3 0 0 0 0 1
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## eduGrad:age18-29 edu<HS:age30-44 eduHS:age30-44 eduSome:age30-44
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 1 0 0 0
## 5 0 1 0 0
## 6 0 0 1 0
## eduGrad:age30-44 edu<HS:age45-64 eduHS:age45-64 eduSome:age45-64
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## eduGrad:age45-64 edu<HS:age≥65 eduHS:age≥65 eduSome:age≥65
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## eduGrad:age≥65
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
jags.terms <- c(unlist(sapply(termnames, function(f, prefix = "eta", jagsvars = varnames(samples)) {
findme <- paste(prefix, f, sep = ".")
return(grep(paste0(findme, "\\["), jagsvars))
})))
str(termnames)
## chr [1:5] "edu" "age" "race.female" "state" "age.edu"
head(varnames(samples))
## [1] "B.state" "eta.age[1]" "eta.age[2]" "eta.age[3]"
## [5] "eta.age[4]" "eta.age.edu[1]"
head(jags.terms)
## edu1 edu2 edu3 edu4 age1 age2
## 22 23 24 25 2 3
fitted.full.m <- lapply(samples, function(c, ...) {
ans <- apply(c, 1, function(s, jags.terms) {
return(xm.all %*% s[jags.terms])
}, ...)
return(ans)
}, jags.terms)
taus.m <- lapply(samples, function(c) {
return(c[, c("tau[1]", "tau[2]", "tau[3]", "tau[4]")])
})
Now go from logit to unweighted predicted probabilities for all cells:
cellpred.m <- array(NA, dim = c(nrow(fitted.full.m[[1]]), nrow(taus.m[[1]]),
5), dimnames = list(cell = rownames(xf.all), samples = 1:nrow(taus.m[[1]]),
party = c("+D", "-d", "I", "-r", "+R")))
cellpred.m[, , 1] <- sapply(1:nrow(taus.m[[1]]), function(s) {
return(plogis(taus.m[[1]][s, 1] - fitted.full.m[[1]][, s]))
})
cellpred.m[, , 2] <- sapply(1:nrow(taus.m[[1]]), function(s) {
return(plogis(taus.m[[1]][s, 2] - fitted.full.m[[1]][, s]) - plogis(taus.m[[1]][s,
1] - fitted.full.m[[1]][, s]))
})
cellpred.m[, , 3] <- sapply(1:nrow(taus.m[[1]]), function(s) {
return(plogis(taus.m[[1]][s, 3] - fitted.full.m[[1]][, s]) - plogis(taus.m[[1]][s,
2] - fitted.full.m[[1]][, s]))
})
cellpred.m[, , 4] <- sapply(1:nrow(taus.m[[1]]), function(s) {
return(plogis(taus.m[[1]][s, 4] - fitted.full.m[[1]][, s]) - plogis(taus.m[[1]][s,
3] - fitted.full.m[[1]][, s]))
})
cellpred.m[, , 5] <- sapply(1:nrow(taus.m[[1]]), function(s) {
return(1 - plogis(taus.m[[1]][s, 4] - fitted.full.m[[1]][, s]))
})
out <- apply(party.post.array, c("edu", "party"), sum)/apply(Census.NWay, c("edu",
"party"), sum)
print(xtable(out), type = "html", digits = 2)
| +D | -d | I | -r | +R | |
|---|---|---|---|---|---|
| < HS | 0.37 | 0.14 | 0.10 | 0.14 | 0.26 |
| HS | 0.33 | 0.14 | 0.10 | 0.15 | 0.29 |
| Some | 0.31 | 0.14 | 0.10 | 0.15 | 0.29 |
| Grad | 0.35 | 0.14 | 0.10 | 0.14 | 0.26 |
out <- apply(party.post.array, c("state", "party"), sum)/apply(Census.NWay,
c("state", "party"), sum)
print(xtable(out), type = "html", digits = 2)
| +D | -d | I | -r | +R | |
|---|---|---|---|---|---|
| AK | 0.27 | 0.14 | 0.10 | 0.16 | 0.32 |
| AL | 0.31 | 0.12 | 0.09 | 0.14 | 0.34 |
| AR | 0.34 | 0.14 | 0.10 | 0.14 | 0.28 |
| AZ | 0.30 | 0.14 | 0.10 | 0.15 | 0.29 |
| CA | 0.38 | 0.15 | 0.10 | 0.14 | 0.23 |
| CO | 0.29 | 0.14 | 0.10 | 0.16 | 0.31 |
| CT | 0.40 | 0.15 | 0.10 | 0.13 | 0.23 |
| DC | 0.64 | 0.12 | 0.06 | 0.08 | 0.10 |
| DE | 0.44 | 0.14 | 0.09 | 0.12 | 0.21 |
| FL | 0.33 | 0.14 | 0.10 | 0.15 | 0.29 |
| GA | 0.37 | 0.13 | 0.09 | 0.13 | 0.29 |
| HI | 0.38 | 0.16 | 0.10 | 0.14 | 0.22 |
| IA | 0.34 | 0.15 | 0.10 | 0.15 | 0.26 |
| ID | 0.22 | 0.13 | 0.10 | 0.17 | 0.38 |
| IL | 0.44 | 0.14 | 0.09 | 0.12 | 0.21 |
| IN | 0.33 | 0.14 | 0.10 | 0.15 | 0.29 |
| KS | 0.26 | 0.13 | 0.10 | 0.16 | 0.34 |
| KY | 0.31 | 0.14 | 0.10 | 0.15 | 0.30 |
| LA | 0.39 | 0.13 | 0.09 | 0.13 | 0.27 |
| MA | 0.40 | 0.15 | 0.10 | 0.13 | 0.21 |
| MD | 0.52 | 0.13 | 0.08 | 0.10 | 0.17 |
| ME | 0.30 | 0.15 | 0.11 | 0.16 | 0.28 |
| MI | 0.39 | 0.14 | 0.09 | 0.14 | 0.24 |
| MN | 0.37 | 0.15 | 0.10 | 0.14 | 0.23 |
| MO | 0.34 | 0.14 | 0.10 | 0.15 | 0.28 |
| MS | 0.33 | 0.13 | 0.09 | 0.14 | 0.32 |
| MT | 0.27 | 0.15 | 0.11 | 0.16 | 0.32 |
| NC | 0.39 | 0.14 | 0.09 | 0.13 | 0.25 |
| ND | 0.28 | 0.15 | 0.11 | 0.16 | 0.30 |
| NE | 0.24 | 0.13 | 0.10 | 0.16 | 0.37 |
| NH | 0.30 | 0.15 | 0.11 | 0.16 | 0.28 |
| NJ | 0.43 | 0.14 | 0.09 | 0.13 | 0.21 |
| NM | 0.35 | 0.15 | 0.10 | 0.15 | 0.25 |
| NV | 0.31 | 0.14 | 0.10 | 0.15 | 0.30 |
| NY | 0.42 | 0.15 | 0.09 | 0.13 | 0.21 |
| OH | 0.34 | 0.14 | 0.10 | 0.15 | 0.27 |
| OK | 0.30 | 0.14 | 0.10 | 0.15 | 0.31 |
| OR | 0.34 | 0.15 | 0.10 | 0.15 | 0.25 |
| PA | 0.39 | 0.14 | 0.09 | 0.14 | 0.24 |
| RI | 0.36 | 0.15 | 0.10 | 0.14 | 0.24 |
| SC | 0.38 | 0.13 | 0.09 | 0.13 | 0.28 |
| SD | 0.27 | 0.14 | 0.11 | 0.16 | 0.32 |
| TN | 0.34 | 0.13 | 0.09 | 0.14 | 0.29 |
| TX | 0.31 | 0.14 | 0.10 | 0.15 | 0.31 |
| UT | 0.17 | 0.11 | 0.09 | 0.17 | 0.45 |
| VA | 0.37 | 0.14 | 0.09 | 0.14 | 0.27 |
| VT | 0.32 | 0.15 | 0.11 | 0.15 | 0.27 |
| WA | 0.37 | 0.15 | 0.10 | 0.14 | 0.23 |
| WI | 0.35 | 0.15 | 0.10 | 0.14 | 0.26 |
| WV | 0.31 | 0.15 | 0.10 | 0.15 | 0.28 |
| WY | 0.27 | 0.13 | 0.10 | 0.16 | 0.35 |
_NoResponse recorded for connected unitsmsh <- getCurlHandle(httpheader = c(SecurityKey = "ugly-guid-hash-here"))
now <- format(Sys.time(), format = "%Y-%m-%dT%H:%M:%S", tz = "america/chicago")
P <- 1
batchsize = 5000
starttime <- "2012-11-05T15:08:10"
con <- file("electionday.csv", open = "at")
while (P < 1e+05) {
lx <- xmlTreeParse(getForm("https://bdewebservices.com/MsBdeService.svc/rest/ElectionEntriesReport",
curl = msh, startDate = "2012-11-05", endDate = "2012-12-24", pageSize = batchsize,
pageNumber = P), asText = TRUE, useInternalNodes = TRUE)
lx <- try(xmlSApply(lx[["//Entries"]], xmlValue), silent = TRUE)
if (!is.error(lx)) {
writeLines(lx, con = con)
P <- P + 1
} else {
Sys.sleep(5)
message(".")
}
}
{xml} <?xml version="1.0"?> <Report> <Count>6562746</Count> <Entries> <Entry>E88170913273F348124A648081C66117C9B0F0C1,NBCElection_event.e07d0a75-1cb1-4040-9598-1e653d0d1b6c, Romney,11/6/2012 5:55:36 PM</Entry> <Entry>947986E0A42422D56F4751BBDFB5F0AFF865A158,NBCElection_event.e07d0a75-1cb1-4040-9598-1e653d0d1b6c, Obama,11/6/2012 5:55:37 PM</Entry> <Entry>D884FA867C73E1C160CEC514AE923646439C9EE0,NBCElection_event.e07d0a75-1cb1-4040-9598-1e653d0d1b6c,_NoResponse,11/6/2012 5:55:44 PM</Entry> </Entries> </Report>
{xml} <QuestionModel> <ID>c92d9183-4965-41d0-ac2d-98f5316ec6e4</ID> <Question>If you could play Xbox LIVE with one person, who would it be?</Question> <DisplayStreamTime>8843</DisplayStreamTime> <ExpirationStreamTime>8918</ExpirationStreamTime> <Answers> <string> Barack Obama </string> <string> Mitt Romney </string> <string> Brian Williams</string> </Answers> </QuestionModel>
parseQuestions <- function(question.xml) {
sapply(question.xml, function(qs) {
q <- xmlRoot(qs)
wording <- xpathApply(q, "//QuestionModel/Question", xmlValue)
ID <- xpathApply(q, "//QuestionModel/ID", xmlValue)
A <- xpathApply(q, "//Answers")
resp <- lapply(A, function(A) xmlSApply(A, xmlValue))
stubs <- list()
for (i in 1:length(wording)) {
stubs[[i]] <- makeSingleStub(ID[[i]], resp = paste(paste0("`", ID[[i]],
"`"), shQuote(resp[[i]]), sep = "=="), vallab = resp[[i]], wording = wording[[i]])
}
names(stubs) <- unlist(ID)
stubs
})
}
single categoricalStub labeled ‘c92d9183-4965-41d0-ac2d-98f5316ec6e4’
wording : chr "If you could play Xbox LIVE with one person, who would it be?"
Barack Obama "`c92d9183-4965-41d0-ac2d-98f5316ec6e4`=='\tBarack Obama\t'"
Mitt Romney "`c92d9183-4965-41d0-ac2d-98f5316ec6e4`=='\tMitt Romney\t'"
Brian Williams "`c92d9183-4965-41d0-ac2d-98f5316ec6e4`=='\tBrian Williams'"
format :List of 2
$ percent: logi TRUE
$ digits : num 1
Empty slots: ‘collab’ ‘tags’ ‘filtertext’ ‘surveycode’
df <- reshape2::dcast(as.data.frame(df), voter_hash ~ QuestionId, value.var = "Answer",
fun.agg = function(x) levels(x)[x][1])
xtabs() of this view, or, YouGov multidimensional tabReportsR> 100*prop.table(xtabs(~ `c92d9183-4965-41d0-ac2d-98f5316ec6e4`, exclude=4, data=df))
## c92d9183-4965-41d0-ac2d-98f5316ec6e4
## 1 2 3
## 62.83078 19.58565 17.58357
phantomswings
cumulativechange
##' Compute array of transition matrices from vote history
##'
##'
##' @param Mat vote history matrix, as produced by
##' \code{acast(panel, voter_hash ~ wave, value.var="vote2012_self")}
##' @param t numeric (probably vector) time index at which to compute $\Delta$
##' @param window numeric number of columns to consider prior to $t$
##' @return A transition matrix with rows and columns:
##' \tabular{cccc}{
##' \tab Obama \tab Romney \tab \Other \tab Undecided
##' Obama \tab \tab \tab \tab
##' Romney \tab \tab \tab \tab
##' Other \tab \tab \tab \tab
##' Undecided \tab \tab \tab \tab
##' }
##' @export
PresChange <- function(t, Mat, window=5){
j <- c({t-window:1}, t)
cur <- Mat[,t]
Mat <- Mat[,j]
lastvote <- function(v) {
v <- na.omit(v)
ifelse(length(v), v[length(v)], NA)
}
prev <- apply(Mat[,1:window], 1, lastvote)
Mat <- na.omit(cbind(prev,cur))
v <- apply(Mat, 1, paste, collapse="")
label <- c("Obama","Romney","Not Sure","Other")
i <- which(matrix(TRUE,4,4),arr.ind=TRUE)
l <- apply(i, 1, paste, collapse="")
ans <- table(v)
ansmat <- matrix(0, 4,4, dimnames=list(label,label))
ansmat[match(names(ans),l)] <- ans
ans <- prop.table(ansmat, margin=1)
attr(ans,"N") <- sum(ansmat)
cur <- prop.table(table(cur))
names(cur) <- label
attr(ans,"current") <- cur
attr(ans,"uniquevoters") <- unique(rownames(Mat))
ans
}
##' Compute array of transition matrices from vote history
##'
##'
##' @param t numeric (probably vector) time index at which to compute $\Delta$
##' @param Mat vote history matrix, as produced by
##' \code{acast(panel, voter_hash ~ wave, value.var="vote2012_self")}
##' @param cut vector of indices, where to cut periods in the data
##' to condense \code{Mat}
##' @param window numeric number of columns to consider prior to $t$
##' @return A transition matrix with rows and columns:
##' \tabular{cccc}{
##' \tab Obama \tab Romney \tab \Other \tab Undecided
##' Obama \tab \tab \tab \tab
##' Romney \tab \tab \tab \tab
##' Other \tab \tab \tab \tab
##' Undecided \tab \tab \tab \tab
##' }
##' @export
PresChangeWeekly <- function(t, Mat, cut=c("6:14","15:21","22:25","26:28","29:30"), window=1){
cut <- sapply(cut, function(range) eval(parse(text=range)), simplify=FALSE)
## condense the matrix.
pickone <- function(row) {
row <- na.omit(row)
ifelse(length(row)>1,
sample(row,1) ,
ifelse(length(row), row, NA) )
}
CMat <- matrix(NA, nrow=nrow(Mat), ncol=length(cut),
dimnames=list(rownames(Mat), NULL))
for (i in 1:length(cut)){
CMat[,i] <- apply(Mat[,cut[[i]]], 1, pickone)
}
j <- c({t-window:1}, t)
cur <- CMat[,t,drop=FALSE]
Mat <- CMat[,j,drop=FALSE]
lastvote <- function(v) {
v <- na.omit(v)
ifelse(length(v), v[length(v)], NA)
}
prev <- apply(Mat[,1:window,drop=FALSE], 1, lastvote)
Mat <- na.omit(cbind(prev,cur))
v <- apply(Mat, 1, paste, collapse="")
label <- c("Obama","Romney","Not Sure","Other")
i <- which(matrix(TRUE,4,4),arr.ind=TRUE)
l <- apply(i, 1, paste, collapse="")
ans <- table(v)
ansmat <- matrix(0, 4,4, dimnames=list(label,label))
ansmat[match(names(ans),l)] <- ans
ans <- prop.table(ansmat, margin=1)
attr(ans,"N") <- sum(ansmat)
cur <- prop.table(table(cur))
names(cur) <- label
attr(ans,"current") <- cur
attr(ans,"uniquevoters") <- unique(rownames(Mat))
ans
}
load("~/yg/Dropbox/XboxLivePolling/data/panelmatrix.RData")
# translist <- lapply(6:ncol(panel), PresChange, Mat=panel, window=5)
library(lattice)
library(xbox)
debugonce(PresChangeWeekly)
P0 <- lapply(2, PresChangeWeekly, Mat = panel, cut = c("6:14", "15:21"))
load("~/yg/Dropbox/XboxLivePolling/data/panelmatrix.RData")
load("transition-matrices.RData")
colMeans(do.call("rbind", lapply(P, attr, "current")))
## Obama Romney Not Sure Other
## 0.47010 0.45926 0.05004 0.02061
lapply(P, attr, "N")
## $Oct03
## [1] 24422
##
## $Oct11
## [1] 21374
##
## $Oct16
## [1] 18623
##
## $Oct22
## [1] 30077
##
## $Oct30