Convert Python to R costs = pd.read_csv("Inpatient_Prospective_Payment_System__IPPS__Provider_Summary_for_the_Top_100_Diagnosis-Related_Groups__DRG__-_FY2011.csv") costs = costs.rename(columns = {'DRG Definition': 'DRG', ' Average Total Payments ': 'Total_Cost', # Note spaces around ' Average Total Payments ' ' Total Discharges ': 'Count_Discharges'}) # Note spaces around ' Total Discharges ' costs[['DRG_Code', 'DRG_Description']] = costs['DRG'].str.split(' - ', expand = True) # Note spaces around ' - ' costs = costs.drop(['DRG'], axis = 1) costs['Avg_DRG_Cost'] = costs.groupby('DRG_Code')['Total_Cost'].transform(np.mean) costs['Cost_Diff'] = costs['Total_Cost'] - costs['Avg_DRG_Cost'] top_drgs = (costs .groupby('DRG_Code')['Count_Discharges'] .sum() .sort_values(ascending = False) ) top_drgs.index[0] (costs .query("DRG_Code == '470' and 'Provider State' == 'CA'") .to_csv("Hip_Replacement_Costs_by_Hosp.csv", index = False) )
Convert Python to R
costs = pd.read_csv("Inpatient_Prospective_Payment_System__IPPS__Provider_Summary_for_the_Top_100_Diagnosis-Related_Groups__DRG__-_FY2011.csv")
costs = costs.rename(columns = {'DRG Definition': 'DRG',
' Average Total Payments ': 'Total_Cost', # Note spaces around ' Average Total Payments '
' Total Discharges ': 'Count_Discharges'}) # Note spaces around ' Total Discharges '
costs[['DRG_Code', 'DRG_Description']] = costs['DRG'].str.split(' - ', expand = True) # Note spaces around ' - '
costs = costs.drop(['DRG'], axis = 1)
costs['Avg_DRG_Cost'] = costs.groupby('DRG_Code')['Total_Cost'].transform(np.mean)
costs['Cost_Diff'] = costs['Total_Cost'] - costs['Avg_DRG_Cost']
top_drgs = (costs
.groupby('DRG_Code')['Count_Discharges']
.sum()
.sort_values(ascending = False)
)
top_drgs.index[0]
(costs
.query("DRG_Code == '470' and 'Provider State' == 'CA'")
.to_csv("Hip_Replacement_Costs_by_Hosp.csv", index = False)
)

Trending now
This is a popular solution!
Step by step
Solved in 2 steps









