Title: | Fit Univariate Distributions |
---|---|
Description: | Wrapper for computing parameters for univariate distributions using MLE. It creates an object that stores d, p, q, r functions as well as parameters and statistics for diagnostics. Currently supports automated fitting from base and actuar packages. A manually fitting distribution fitting function is included to support directly specifying parameters for any distribution from ancillary packages. |
Authors: | Thomas Roh [aut, cre] |
Maintainer: | Thomas Roh <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.6.2 |
Built: | 2025-02-15 05:41:25 UTC |
Source: | https://github.com/tomroh/fitur |
A wrapper for building function families given a numeric vector and the distribution
build_dist(x, distribution)
build_dist(x, distribution)
x |
numeric vector |
distribution |
distribution character name |
list of distribution functions for d, p, q, r, and parameters
fittedDists <- build_dist(rpois(100,5), 'pois') dpois(x = 5, lambda = 5) fittedDists$dpois(5) ppois(5, 5) fittedDists$ppois(5) qpois(.5, 5) fittedDists$qpois(.5) set.seed(8257) rpois(100, 5) set.seed(8257) fittedDists$rpois(100) fittedDists$parameters
fittedDists <- build_dist(rpois(100,5), 'pois') dpois(x = 5, lambda = 5) fittedDists$dpois(5) ppois(5, 5) fittedDists$ppois(5) qpois(.5, 5) fittedDists$qpois(.5) set.seed(8257) rpois(100, 5) set.seed(8257) fittedDists$rpois(100) fittedDists$parameters
Calculate moments of a numeric vector
calc_moments(x)
calc_moments(x)
x |
a numeric vector |
a named vector of descriptive statistics
x <- rexp(1000, 2) calc_moments(x)
x <- rexp(1000, 2) calc_moments(x)
The Discrete Uniform Distribution
ddunif(x, min = 0, max = 1) pdunif(q, min = 0, max = 1) qdunif(p, min = 0, max = 1) rdunif(n, min = 0L, max = 1)
ddunif(x, min = 0, max = 1) pdunif(q, min = 0, max = 1) qdunif(p, min = 0, max = 1) rdunif(n, min = 0L, max = 1)
x |
vector of (non-negative integer) quantiles |
min |
minimum value of distribution (integer) |
max |
maximum value of distribution (integer) |
q |
vector of quantiles |
p |
vector of probabilities |
n |
number of random values to return |
ddunif gives the density, pdunif gives the distribution function, qdunif gives the quantile function, rdunif generates random deviates
ddunif(0:1) pdunif(1) qdunif(.5) rdunif(10)
ddunif(0:1) pdunif(1) qdunif(.5) rdunif(10)
Interactively submit a numeric vector and choose what distributions that you want to run fit diagnostics. Click done to have the desired distribution code put into your cursor position.
fit_dist_addin()
fit_dist_addin()
Fit Empirical Distribution
fit_empirical(x)
fit_empirical(x)
x |
integer or double vector |
if integer vector then list of family functions for d, p, q, r, and parameters based on each integer value. if it is a double vector then list of family functions for d, p, q, r, and parameters based on Freedman-Diaconis rule for optimal number of histogram bins.
set.seed(562) x <- rpois(100, 5) empDis <- fit_empirical(x) # probability density function plot(empDis$dempDis(0:10), xlab = 'x', ylab = 'dempDis') # cumulative distribution function plot(x = 0:10, y = empDis$pempDis(0:10), #type = 'l', xlab = 'x', ylab = 'pempDis') # quantile function plot(x = seq(.1, 1, .1), y = empDis$qempDis(seq(.1, 1, .1)), type = 'p', xlab = 'x', ylab = 'qempDis') # random sample from fitted distribution summary(empDis$r(100)) empDis$parameters set.seed(562) x <- rexp(100, 1/5) empCont <- fit_empirical(x) # probability density function plot(x = 0:10, y = empCont$dempCont(0:10), xlab = 'x', ylab = 'dempCont') # cumulative distribution function plot(x = 0:10, y = empCont$pempCont(0:10), #type = 'l', xlab = 'x', ylab = 'pempCont') # quantile function plot(x = seq(.5, 1, .1), y = empCont$qempCont(seq(.5, 1, .1)), type = 'p', xlab = 'x', ylab = 'qempCont') # random sample from fitted distribution summary(empCont$r(100)) empCont$parameters
set.seed(562) x <- rpois(100, 5) empDis <- fit_empirical(x) # probability density function plot(empDis$dempDis(0:10), xlab = 'x', ylab = 'dempDis') # cumulative distribution function plot(x = 0:10, y = empDis$pempDis(0:10), #type = 'l', xlab = 'x', ylab = 'pempDis') # quantile function plot(x = seq(.1, 1, .1), y = empDis$qempDis(seq(.1, 1, .1)), type = 'p', xlab = 'x', ylab = 'qempDis') # random sample from fitted distribution summary(empDis$r(100)) empDis$parameters set.seed(562) x <- rexp(100, 1/5) empCont <- fit_empirical(x) # probability density function plot(x = 0:10, y = empCont$dempCont(0:10), xlab = 'x', ylab = 'dempCont') # cumulative distribution function plot(x = 0:10, y = empCont$pempCont(0:10), #type = 'l', xlab = 'x', ylab = 'pempCont') # quantile function plot(x = seq(.5, 1, .1), y = empCont$qempCont(seq(.5, 1, .1)), type = 'p', xlab = 'x', ylab = 'qempCont') # random sample from fitted distribution summary(empCont$r(100)) empCont$parameters
Fit Univariate Distribution
fit_univariate(x, distribution, type = "continuous")
fit_univariate(x, distribution, type = "continuous")
x |
numeric vector |
distribution |
character name of distribution |
type |
discrete or continuous data |
a fitted list object of d, p, q, r distribution functions and parameters, MLE for probability distributions, custom fit for empirical
# Fit Discrete Distribution set.seed(42) x <- rpois(1000, 3) fitted <- fit_univariate(x, 'pois', type = 'discrete') # density function plot(fitted$dpois(x=0:10), xlab = 'x', ylab = 'dpois') # distribution function plot(fitted$ppois(seq(0, 10, 1)), xlab= 'x', ylab = 'ppois') # quantile function plot(fitted$qpois, xlab= 'x', ylab = 'qpois') # sample from theoretical distribution summary(fitted$rpois(100)) # estimated parameters from MLE fitted$parameters set.seed(24) x <- rweibull(1000, shape = .5, scale = 2) fitted <- fit_univariate(x, 'weibull') # density function plot(fitted$dweibull, xlab = 'x', ylab = 'dweibull') # distribution function plot(fitted$pweibull, xlab = 'x', ylab = 'pweibull') # quantile function plot(fitted$qweibull, xlab = 'x', ylab = 'qweibull') # sample from theoretical distribution summary(fitted$rweibull(100)) # estimated parameters from MLE fitted$parameters
# Fit Discrete Distribution set.seed(42) x <- rpois(1000, 3) fitted <- fit_univariate(x, 'pois', type = 'discrete') # density function plot(fitted$dpois(x=0:10), xlab = 'x', ylab = 'dpois') # distribution function plot(fitted$ppois(seq(0, 10, 1)), xlab= 'x', ylab = 'ppois') # quantile function plot(fitted$qpois, xlab= 'x', ylab = 'qpois') # sample from theoretical distribution summary(fitted$rpois(100)) # estimated parameters from MLE fitted$parameters set.seed(24) x <- rweibull(1000, shape = .5, scale = 2) fitted <- fit_univariate(x, 'weibull') # density function plot(fitted$dweibull, xlab = 'x', ylab = 'dweibull') # distribution function plot(fitted$pweibull, xlab = 'x', ylab = 'pweibull') # quantile function plot(fitted$qweibull, xlab = 'x', ylab = 'qweibull') # sample from theoretical distribution summary(fitted$rweibull(100)) # estimated parameters from MLE fitted$parameters
Fit Univariate Distributions by Specifying Parameters
fit_univariate_man(distribution, parameters)
fit_univariate_man(distribution, parameters)
distribution |
distribution character name |
parameters |
named vector of parameters to set |
list of distribution functions for d, p, q, r, and parameters
manFun <- fit_univariate_man('norm', c(mean = 2, sd = 5)) set.seed(5) m1 <- mean(manFun$rnorm(100000)) set.seed(5) m2 <- mean(rnorm(100000, 2, 5)) identical(m1, m2)
manFun <- fit_univariate_man('norm', c(mean = 2, sd = 5)) set.seed(5) m1 <- mean(manFun$rnorm(100000)) set.seed(5) m2 <- mean(rnorm(100000, 2, 5)) identical(m1, m2)
Generate Single Distribution Function
gen_dist_fun(f, parameters, ...)
gen_dist_fun(f, parameters, ...)
f |
one of distribution functions |
parameters |
new parameters for distribution |
... |
arguments to pass on to distribution function |
one of parameterized distribution functions in d, p, q, r
Apply all goodness of fit tests and return a data.frame with the results
gof_tests(fits, x)
gof_tests(fits, x)
fits |
a list object produced from fit_univariate, fit_empirical, or fit_univariate_man |
x |
numeric vector of sample data |
a data.frame of test statistic results for each distribution
set.seed(84) x <- rgamma(100, 1, 1) dists <- c('gamma', 'lnorm', 'weibull') multipleFits <- lapply(dists, fit_univariate, x = x) gof_tests(multipleFits, x)
set.seed(84) x <- rgamma(100, 1, 1) dists <- c('gamma', 'lnorm', 'weibull') multipleFits <- lapply(dists, fit_univariate, x = x) gof_tests(multipleFits, x)
Wrappers to compute goodness of fit test froms distfun objects
ks_test(distfun, x, ...) ## S3 method for class 'distfun' ad_test(distfun, x) ad_test(distfun, x) ## S3 method for class 'distfun' cvm_test(distfun, x) cvm_test(distfun, x)
ks_test(distfun, x, ...) ## S3 method for class 'distfun' ad_test(distfun, x) ad_test(distfun, x) ## S3 method for class 'distfun' cvm_test(distfun, x) cvm_test(distfun, x)
distfun |
a distfun object |
x |
numeric vector |
... |
arguments to be passed on to test function |
goodness of fit object
x <- rgamma(100, 1, 1) fit <- fit_univariate(x, 'gamma') ks_test(fit, x) ad_test(fit, x) cvm_test(fit, x)
x <- rgamma(100, 1, 1) fit <- fit_univariate(x, 'gamma') ks_test(fit, x) ad_test(fit, x) cvm_test(fit, x)
Test if object is a distfun object
is.distfun(x)
is.distfun(x)
x |
an R object to be tested |
TRUE if x is a disfun object, FALSE otherwise
Density Comparison Plot
plot_density(x, fits, nbins)
plot_density(x, fits, nbins)
x |
numeric vector of sample data |
fits |
a list object produced from fit_univariate, fit_empirical, or fit_univariate_man |
nbins |
number of bins for histogram |
ggplot of empirical histogram of x compared to theoretical density distributions
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_density(x, fits, 30) + theme_bw()
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_density(x, fits, 30) + theme_bw()
P-P Plot
plot_pp(x, fits)
plot_pp(x, fits)
x |
numeric vector of sample data |
fits |
a list object produced from fit_univariate, fit_empirical, or fit_univariate_man |
ggplot of percentile-percentile comparison of theoretical distribution
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_pp(x, fits) + theme_bw()
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_pp(x, fits) + theme_bw()
Q-Q Plot
plot_qq(x, fits)
plot_qq(x, fits)
x |
numeric vector of sample data |
fits |
a list object produced from fit_univariate, fit_empirical, or fit_univariate_man |
ggplot of quantile-quantile comparison of theoretical distribution
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_qq(x, fits) + theme_bw()
library(ggplot2) set.seed(37) x <- rgamma(10000, 5) dists <- c('gamma', 'lnorm', 'weibull') fits <- lapply(dists, fit_univariate, x = x) plot_qq(x, fits) + theme_bw()