DataScience Classroomnotes 25/Dec/2021

R Data frame Operations

  • Create an empty data frame
empty_df <- data.frame()
  • Adding rows to the data frame
employee_1 <- c('Ironman', 'Marketing', 40, 107.8)
employee_df <- rbind(empty_df, employee_1)

employee_2 <- c('Thor', 'Sales', 40, 100)
employee_df <- rbind(employee_df, employee_2)
print(employee_df)

colnames(employee_df) <- c("name", "dept", "hours per week", "hourly pay")
print(employee_df)
  • Structure of the data frame
print(str(employee_df))

Preview
* Add a column to the data frame

employee_ids <- c(1L, 2L)
employee_df <- cbind(employee_df, employee_ids)
print(str(employee_df))
  • Removing a column from the data frame
new_employee_df <- subset(employee_df, select= -c(name, dept))
print(new_employee_df)

new_employee_df_2 <- employee_df[!(names(employee_df) %in% c("dept", "hours per week"))]
  • Refer Here for the changes

  • Create a data frame from the csv file Refer Here and sort the data based on FIRST_NAME

employees[order(employees$EMPLOYEE_ID), ]
  • Arrange employee data in the following order
  • EMPLOYEE_ID
  • EMAIL
  • JOB_ID
  • PHONE_NUMBER
  • FIRST_NAME
  • LAST_NAME
  • SALARY
  • MANAGER_ID
  • DEPARTMENT_ID
employees[c("EMPLOYEE_ID", "EMAIL", "JOB_ID", "PHONE_NUMBER")]
  • write an function to print the top nth high paid employees
nth.higheshpaid <- function(n, employees){
  return(employees[order(employees$SALARY, decreasing = TRUE), ][n, ])
}

nth.higheshpaid(employees = employees, n=3)

n.highestpaid <- function(n, employees){
  return(employees[order(employees$SALARY, decreasing = TRUE), ][1:n, ])
}

n.highestpaid(5, employees)

Leave a Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.

About continuous learner

devops & cloud enthusiastic learner