Skip to content

Commit

Permalink
fixes @summary (#124)
Browse files Browse the repository at this point in the history
* fixes `@summary`

* Added support for non-numeric columns, minor tweaks to column names.

---------

Co-authored-by: Karandeep Singh <karandeep@gmail.com>
  • Loading branch information
drizk1 and kdpsingh authored Dec 28, 2024
1 parent 70b35d4 commit afdc188
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 14 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# TidierData.jl updates

## v16.3
- Bugfix: `@summary` no longer errors with non-numeric columns. Instead, it only reports non-numeric summary stats on non-numeric columns. Minor changes to summary column names to be lowercase and snakecase.

## v0.16.2 - 2024-09-03
- Bugfix: `@slice_min` and `@slice_max` respect the `n` argument
- Adds `@head`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierData"
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
authors = ["Karandeep Singh"]
version = "0.16.2"
version = "0.16.3"

[deps]
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
Expand Down
3 changes: 2 additions & 1 deletion src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2415,7 +2415,8 @@ For numerical columns, returns a dataframe with the Q1,Q3, min, max, mean, media
julia> df = DataFrame(a = [1, 2, 3, 4, 5],
b = [missing, 7, 8, 9, 10],
c = [11, missing, 13, 14, missing],
d = [16, 17, 18, 19, 20]);
d = [16.1, 17.2, 18.3, 19.4, 20.5],
e = ["a", "a", "a", "a", "a"]);
julia> @summary(df);
Expand Down
43 changes: 31 additions & 12 deletions src/summary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,37 @@ function summary_stats(df::DataFrame)
summary_data = []
for column in colnames
col = df[:, column]
col_nonmissing = collect(skipmissing(col))
push!(summary_data, (
Column = column,
Min = minimum(col_nonmissing),
Q1 = quantile(col_nonmissing, 0.25),
Median = median(col_nonmissing),
Mean = mean(col_nonmissing),
Q3 = quantile(col_nonmissing, 0.75),
Max = maximum(col_nonmissing),
Count = length(col_nonmissing),
Missing_Count = count(ismissing, col)
))
if eltype(col) <: Union{Number, Missing}
col_nonmissing = collect(skipmissing(col))
push!(summary_data, (
column = column,
min = minimum(col_nonmissing),
q1 = quantile(col_nonmissing, 0.25),
median = median(col_nonmissing),
mean = mean(col_nonmissing),
q3 = quantile(col_nonmissing, 0.75),
max = maximum(col_nonmissing),
non_missing_values = length(col_nonmissing),
missing_values = count(ismissing, col),
total_values = length(col),
unique_values = length(unique(col_nonmissing))
))
else
col_nonmissing = collect(skipmissing(col))
push!(summary_data, (
column = column,
min = nothing,
q1 = nothing,
median = nothing,
mean = nothing,
q3 = nothing,
max = nothing,
non_missing_values = length(col_nonmissing),
missing_values = count(ismissing, col),
total_values = length(col),
unique_values = length(unique(col_nonmissing))
))
end
end
return DataFrame(summary_data)
end
Expand Down

0 comments on commit afdc188

Please sign in to comment.