tidypolars.funs

Module Contents

Functions

across(cols[, fn, names_prefix])

Apply a function across a selection of columns

as_boolean(x)

Convert to a boolean

as_float(x)

Convert to float. Defaults to Float64.

as_integer(x)

Convert to integer. Defaults to Int64.

as_string(x)

Convert to string. Defaults to Utf8.

abs(x)

Absolute value

between(x, left, right)

Test if values of a column are between two values

case_when(expr)

Case when

cast(x, dtype)

General type conversion.

coalesce(*args)

Coalesce missing values

cor(x, y[, method])

Find the correlation of two columns

cov(x, y)

Find the covariance of two columns

count(x)

Number of observations in each group

first(x)

Get first value

floor(x)

Round numbers down to the lower integer

if_else(condition, true, false)

If Else

is_finite(x)

Test if values of a column are finite

is_in(x, y)

Test if values of a column are in a list of values

is_infinite(x)

Test if values of a column are infinite

is_not(x)

Flip values of a boolean series

is_nan(x)

Test if values of a column are nan

is_not_in(x, y)

Test if values of a column are not in a list of values

is_not_null(x)

Test if values of a column are not null

is_null(x)

Test if values of a column are null

lag(x[, n, default])

Get lagging values

last(x)

Get last value

lead(x[, n, default])

Get leading values

length(x)

Number of observations in each group

log(x)

Compute the natural logarithm of a column

log10(x)

Compute the base 10 logarithm of a column

max(x)

Get column max

mean(x)

Get column mean

median(x)

Get column median

min(x)

Get column minimum

n()

Number of observations in each group

n_distinct(x)

Get number of distinct values in a column

quantile(x[, quantile])

Get number of distinct values in a column

read_csv(file, *args, **kwargs)

Simple wrapper around polars.read_csv

read_parquet(source, *args, **kwargs)

Simple wrapper around polars.read_parquet

rep(x[, times])

Replicate the values in x

replace_null(x[, replace])

Replace null values

round(x[, decimals])

Get column standard deviation

row_number()

Return row number

sd(x)

Get column standard deviation

sqrt(x)

Get column square root

sum(x)

Get column sum

var(x)

Get column variance

across(cols, fn=lambda x: ..., names_prefix=None)[source]

Apply a function across a selection of columns

Parameters:
  • cols (list) – Columns to operate on

  • fn (lambda) – A function or lambda to apply to each column

  • names_prefix (Optional - str) – Prefix to append to changed columns

Examples

>>> df = tp.Tibble(x = ['a', 'a', 'b'], y = range(3), z = range(3))
>>> df.mutate(across(['y', 'z'], lambda x: x * 2))
>>> df.mutate(across(tp.Int64, lambda x: x * 2, names_prefix = "double_"))
>>> df.summarize(across(['y', 'z'], tp.mean), by = 'x')
as_boolean(x)[source]

Convert to a boolean

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.mutate(bool_x = tp.as_boolean(col('x')))
as_float(x)[source]

Convert to float. Defaults to Float64.

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.mutate(float_x = tp.as_float(col('x')))
as_integer(x)[source]

Convert to integer. Defaults to Int64.

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.mutate(int_x = tp.as_integer(col('x')))
as_string(x)[source]

Convert to string. Defaults to Utf8.

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.mutate(string_x = tp.as_string(col('x')))
abs(x)[source]

Absolute value

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.mutate(abs_x = tp.abs('x'))
>>> df.mutate(abs_x = tp.abs(col('x')))
between(x, left, right)[source]

Test if values of a column are between two values

Parameters:
  • x (Expr, Series) – Column to operate on

  • left (int) – Value to test if column is greater than or equal to

  • right (int) – Value to test if column is less than or equal to

Examples

>>> df = tp.Tibble(x = range(4))
>>> df.filter(tp.between(col('x'), 1, 3))
case_when(expr)[source]

Case when

Parameters:

expr (Expr) – A logical expression

Examples

>>> df = tp.Tibble(x = range(1, 4))
>>> df.mutate(
>>>    case_x = tp.case_when(col('x') < 2).then(1)
>>>             .when(col('x') < 3).then(2)
>>>             .otherwise(0)
>>> )
cast(x, dtype)[source]

General type conversion.

Parameters:
  • x (Expr, Series) – Column to operate on

  • dtype (DataType) – Type to convert to

Examples

>>> df.mutate(abs_x = tp.cast(col('x'), tp.Float64))
coalesce(*args)[source]

Coalesce missing values

Parameters:

args (Expr) – Columns to coalesce

Examples

>>> df.mutate(abs_x = tp.cast(col('x'), tp.Float64))
cor(x, y, method='pearson')[source]

Find the correlation of two columns

Parameters:
  • x (Expr) – A column

  • y (Expr) – A column

  • method (str) – Type of correlation to find. Either ‘pearson’ or ‘spearman’.

Examples

>>> df.summarize(cor = tp.cor(col('x'), col('y')))
cov(x, y)[source]

Find the covariance of two columns

Parameters:
  • x (Expr) – A column

  • y (Expr) – A column

Examples

>>> df.summarize(cor = tp.cov(col('x'), col('y')))
count(x)[source]

Number of observations in each group

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(count = tp.count(col('x')))
first(x)[source]

Get first value

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(first_x = tp.first('x'))
>>> df.summarize(first_x = tp.first(col('x')))
floor(x)[source]

Round numbers down to the lower integer

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.mutate(floor_x = tp.floor(col('x')))
if_else(condition, true, false)[source]

If Else

Parameters:
  • condition (Expr) – A logical expression

  • true – Value if the condition is true

  • false – Value if the condition is false

Examples

>>> df = tp.Tibble(x = range(1, 4))
>>> df.mutate(if_x = tp.if_else(col('x') < 2, 1, 2))
is_finite(x)[source]

Test if values of a column are finite

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = [1.0, float('inf')])
>>> df.filter(tp.is_finite(col('x')))
is_in(x, y)[source]

Test if values of a column are in a list of values

Parameters:
  • x (Expr, Series) – Column to operate on

  • y (list) – List to test against

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_in(col('x'), [1, 2]))
is_infinite(x)[source]

Test if values of a column are infinite

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = [1.0, float('inf')])
>>> df.filter(tp.is_infinite(col('x')))
is_not(x)[source]

Flip values of a boolean series

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_not(col('x') < 2))
is_nan(x)[source]

Test if values of a column are nan

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_nan(col('x')))
is_not_in(x, y)[source]

Test if values of a column are not in a list of values

Parameters:
  • x (Expr, Series) – Column to operate on

  • y (list) – List to test against

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_not_in(col('x'), [1, 2]))
is_not_null(x)[source]

Test if values of a column are not null

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_not_in(col('x'), [1, 2]))
is_null(x)[source]

Test if values of a column are null

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = range(3))
>>> df.filter(tp.is_not_in(col('x'), [1, 2]))
lag(x, n: int = 1, default=None)[source]

Get lagging values

Parameters:
  • x (Expr, Series) – Column to operate on

  • n (int) – Number of positions to lag by

  • default (optional) – Value to fill in missing values

Examples

>>> df.mutate(lag_x = tp.lag(col('x')))
>>> df.mutate(lag_x = tp.lag('x'))
last(x)[source]

Get last value

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(last_x = tp.last('x'))
>>> df.summarize(last_x = tp.last(col('x')))
lead(x, n: int = 1, default=None)[source]

Get leading values

Parameters:
  • x (Expr, Series) – Column to operate on

  • n (int) – Number of positions to lead by

  • default (optional) – Value to fill in missing values

Examples

>>> df.mutate(lead_x = tp.lead(col('x')))
>>> df.mutate(lead_x = col('x').lead())
length(x)[source]

Number of observations in each group

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(length = tp.length(col('x')))
log(x)[source]

Compute the natural logarithm of a column

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.mutate(log = tp.log('x'))
log10(x)[source]

Compute the base 10 logarithm of a column

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.mutate(log = tp.log10('x'))
max(x)[source]

Get column max

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(max_x = tp.max('x'))
>>> df.summarize(max_x = tp.max(col('x')))
mean(x)[source]

Get column mean

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(mean_x = tp.mean('x'))
>>> df.summarize(mean_x = tp.mean(col('x')))
median(x)[source]

Get column median

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(median_x = tp.median('x'))
>>> df.summarize(median_x = tp.median(col('x')))
min(x)[source]

Get column minimum

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(min_x = tp.min('x'))
>>> df.summarize(min_x = tp.min(col('x')))
n()[source]

Number of observations in each group

Examples

>>> df.summarize(count = tp.n())
n_distinct(x)[source]

Get number of distinct values in a column

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(min_x = tp.n_distinct('x'))
>>> df.summarize(min_x = tp.n_distinct(col('x')))
quantile(x, quantile=0.5)[source]

Get number of distinct values in a column

Parameters:
  • x (Expr, Series) – Column to operate on

  • quantile (float) – Quantile to return

Examples

>>> df.summarize(quantile_x = tp.quantile('x', .25))
read_csv(file: str, *args, **kwargs)[source]

Simple wrapper around polars.read_csv

read_parquet(source: str, *args, **kwargs)[source]

Simple wrapper around polars.read_parquet

rep(x, times=1)[source]

Replicate the values in x

Parameters:
  • x (const, Series) – Value or Series to repeat

  • times (int) – Number of times to repeat

Examples

>>> tp.rep(1, 3)
>>> tp.rep(pl.Series(range(3)), 3)
replace_null(x, replace=None)[source]

Replace null values

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df = tp.Tibble(x = [0, None], y = [None, None])
>>> df.mutate(x = tp.replace_null(col('x'), 1))
round(x, decimals=0)[source]

Get column standard deviation

Parameters:
  • x (Expr, Series) – Column to operate on

  • decimals (int) – Decimals to round to

Examples

>>> df.mutate(x = tp.round(col('x')))
row_number()[source]

Return row number

Examples

>>> df.mutate(row_num = tp.row_number())
sd(x)[source]

Get column standard deviation

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(sd_x = tp.sd('x'))
>>> df.summarize(sd_x = tp.sd(col('x')))
sqrt(x)[source]

Get column square root

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.mutate(sqrt_x = tp.sqrt('x'))
sum(x)[source]

Get column sum

Parameters:

x (Expr, Series) – Column to operate on

Examples

>>> df.summarize(sum_x = tp.sum('x'))
>>> df.summarize(sum_x = tp.sum(col('x')))
var(x)[source]

Get column variance

Parameters:

x (Expr) – Column to operate on

Examples

>>> df.summarize(sum_x = tp.var('x'))
>>> df.summarize(sum_x = tp.var(col('x')))