
/*------------------------------------------------------------------*
 | MACRO NAME: auc_ae
 | SHORT DESC: Using AUC analysis to compare individual/overall adverse events
 |             between treatment groups over time
 *------------------------------------------------------------------*
 | CREATED BY: ToxT Team                                (07/20/2016)
 |
 | Version 2 (4/27/2018): Sort the _wcytox_tmp dataset for merging purpose
 |                        Completed - 1/7/2019
 *------------------------------------------------------------------*
 | PURPOSE
 |
 | This program use AUC analysis to compares individual or overall adverse events
 | per NCI Common Terminology Criteria for Adverse Events (CTCAE) or
 | individual symptoms/QOL items reported per Patient Report Outcomes (PRO)
 | for each treatment groups over data collection time points.
 | This macro also utilized the AUC macro, table macro and tablemrg macro
 | written by Paul Novotny to perform AUC calculation, to display the summary
 | statistics of the data analysis and to organize the summary tables.
 |
 |
 | The program will produce two documents in the directory where SAS is run:
 |
 | For CTCAE:
 | 1. auc_ae_figures_ae_UnadjustedAUC.doc OR auc_ae_figures_ae_ProratedAUC.doc:
 |    Area under the curve (AUC) graph summarizing AE profiles over the entire
 |    course of a study by treatment group
 |    (User can choose to either calculate the Unadjusted AUC or Prorated AUC)
 |
 | 2. auc_ae_tbl_ae_UnadjustedAUC.doc OR auc_ae_tbl_ae_ProratedAUC.doc
 |    Table Summary of AUC, maximum grade (analyzed as continous or categorical
 |    data), distribution of any adverse event or any grade 3 adverse event
 |    of individual or overall adverse event by treatment group at each timepoint
 |    (User can choose to either calculate the Unadjusted AUC or Prorated AUC)
 |
 | NOTES: Overall AE is summarized across all AEs that are included
 |        in the CYTOX dataset. If user would like to see the Overall AE
 |        across a subset of AEs that are specified in TOXICITY_LIST
 |        parameter, the CYTOX dataset need to be modified to include
 |        only the subset AEs that are of user interest
 |
 | For PRO Symptoms/QOL items:
 | 1. auc_ae_figures_qol_UnadjustedAUC.doc OR auc_ae_figures_qol_ProratedAUC.doc:
 |    Area under the curve (AUC) graph summarizing individual symptom/ QOL profiles
 |    over the entire course of a study by treatment group
 |    (User can choose to either calculate the Unadjusted AUC or Prorated AUC)
 |
 | 2. auc_ae_tbl_qol_UnadjustedAUC.doc OR auc_ae_tbl_qol_ProratedAUC.doc:
 |    Two sets of Table Summary of AUC, maximum scores and maximum scores categories
 |    of individual symptom/ QOL items by treatment group at each timepoint
 |    per two ways of AUC calculation when baseline data is missing:
 |    a) if baseling is missing, the AUC is calculated using known data
 |    b) if a patient has missing baseline, that patient is not used in
 |        calculating the AUC
 |
 | ~ IMPORTANT NOTES: ~
 | 1. The AUC graphs will be produced in black/white version ONLY
 | 2. Datasets that are used to produce the AUC graph and summary tables
 |    can be found in WORK directory with prefix of f_ in dataset name
 |    where user can further modify or tweak the graphing per user preference
 |
 *------------------------------------------------------------------*
 | REQUIRED DATASETS and DATA FIELDS
 |
 | 1. crse dataset with one observation per patient with the following data fields:
 |    dcntr_id          : patient identifier
 |    evalae            : evaluable for toxicity (numeric)
 |                        (1= at least one AE form is entered other than
 |                            the baseline timepoint
 |                         0= otherwise)
 |    [comparison group]: group to be used for data comparison
 |                        which will be specified in BY macro parameter
 |                        (i.e. arm)
 |
 | 2. prodata dataset with one observation per time point per patient with the
 |    following data fields (REQUIRED for CTCAE analysis ONLY):
 |    dcntr_id          : patient identifier
 |    [timepoint]       : timepoint for treatment (numeric) which will be specified
 |                        in TIMEPOINT macro parameter (i.e. cycle)
 |    Notes: The dataset should include all treatment time point that are
 |            of user interest for the analysis
 |
 | 3. cytox dataset with one observation per maximum grade per adverse event per
 |    timepoint per patient with the following data fields (REQUIRED for CTCAE analysis ONLY):
 |    dcntr_id          : patient identifier
 |    [timepoint]       : timepoint for adverse events reporting (numeric) which
 |                        will be specified in TIMEPOINT macro parameter (i.e. cycle)
 |    toxicity          : Adverse event as in medra codes (numeric and should be formatted
 |                        to give adverse event description) (i.e. 900182)
 |    grade             : Severity of the adverse event according to CTC guidelines.
 |                        (numeric) (i.e. 0, 1, 2, 3, 4, 5)
 |    Notes: The dataset should include all adverse event reported up to the last time point
 |           indicated in protdata dataset of user interest for the analysis
 |
 | 4. qol dataset with one observation per symptom per timepoint per patient
 |    with the following data fields (REQUIRED for PRO Symptoms analysis ONLY):
 |    dcntr_id          : patient identifier
 |    [timepoint]       : timepoint for symptoms reporting (numeric) which
 |                        will be specified in TIMEPOINT macro parameter (i.e. cycle)
 |    qol               : SAS variable name for each symptom (character)
 |                        (i.e. p_q10, poms04)
 |    _label_           : Description/Label for each symptom (character)
 |                        (i.e. Overall QOL, Sleep Interference, Nervousness)
 |    scores            : Symptoms/QOL items scores in a scale of 0 to 100 (numeric)
 |                        with 0=Low Quality of Life and 100=Best Quality of Life
 |                        Notes: for symptoms with 0=no symptom and 100=worst symptom
 |                               the scores need to be reversed before using this program
 |    Notes: The dataset should include all symptoms/qol items reported up to the
 |           last time point of user interest for the analysis
 *------------------------------------------------------------------*
 | REQUIRED PARAMETERS
 |
 | datasource   : The location where analysis datasets are saved
 |                1 = Internal CCS database
 |                2 = User provided datasets in WORK directory (DEFAULT)
 |
 | datatype     : Type of adverse events/symptoms reported
 |                1 = Adverse Events per CTCAE (DEFAULT)
 |                2 = Symptoms/QOL items per PRO
 |
 | by           : Group Comparison (i.e. arm)
 |
 | timepoint    : Time variable (numeric)
 |
 | toxicity_list: List of toxicity codes separated by space for CTCAE
 |                (i.e. 800001 832222 1002222)
 |                OR List of SAS variable names separated by space for PRO
 |                symptoms/QOL items (i.e. lasa01 poms03 lasa09 lccs4)
 |                Notes: This allows up to 8 AEs/Symptoms/QOL items

 | OPTIONAL PARAMETERS
 |
 | studynum     : study reference number (REQUIRED if datasource=1) (i.e. MCXXXX)
 |
 | incmiss      : AE grade assumption when specified AE entry is not available (for CTCAE ONLY)
 |                y = assumed grade 0 if there is no entry for user specified AE in the data set,
 |                    with condition that data entry is available for other AE at specific
 |                    timepoint (DEFAULT)
 |                n = assumed missing or do nothing if there is no entry for user specified
 |                    AE in the dataset
 | rescale      : Enter the number that you would like to rescale by. (numeric)
 |                0                   = calculate the AUC over all available time points (DEFAULT)
 |                                      (which is the unadjusted AUC)
 |                User specify number = The AUC will be divided by the number
 |                                      of time intervals with known values
 |                                      (to get an average AUC per time interval)
 |                                      and then will be multiplied by this parameter
 |                                      (which is the expected number of intervals).
 |                                      This is an adjustment to account for subjects
 |                                      with different numbers of evaluations.
 |
 *------------------------------------------------------------------*
 | SAMPLE CODES USED to prepare the QOL dataset
 |
 | For example:
 | QOL_tmp dataset contains one observation per timepoint with data fields include:
 | dcntr_id, visit, p_q01, p_q07, p_q09, p_q11
 |
 | proc sort data=qol_tmp;
 |    by dcntr_id visit;
 | run;
 |
 | proc transpose=qol_tmp out=qol (rename=(col1=scores));
 |    by dcntr_id visit;
 |    var p_q01 p_q07 p_q09 p_q11;
 | run;
 |
 | data qol;
 |    set qol;
 |    rename _name_=qol;
 |    label _name_='QOL';
 | run;
 |
 *------------------------------------------------------------------*
 | EXAMPLE:
 |
 | For CTCAE:
 |    %auc_ae (by=arm, timepoint=cycle,
 |             toxicity_list=8000001 900182 900146 21162 21197 21521,
 |             rescale=1);
 | For PRO Symptoms/QOL items:
 |    %auc_ae (by=arm, datatype=2, timepoint=visit,
 |             toxicity_list=p_q08 p_q09 p_q10 p_q11 p_q01 p_q06);
 *------------------------------------------------------------------*
 */


%macro auc_ae (datasource=2, studynum=, datatype=1, by=, timepoint=, toxicity_list=, incmiss=y, rescale=0);

**********************************;
**** Variables Initializations ***;
**********************************;
%let studynum = %upcase(&studynum.);

%local myErrMsg dtype dname scrgrd aevar dyaxis rtitle char_tmpt;

%if &datatype.=1 %then %do;
   %let dtype=ae;
   %let dname=cytox;
   %let scrgrd=Grade;
   %let aevar=toxicity;
   %let dyaxis=%str((0 to 5 by 1));
%end;

%if &datatype.=2 %then %do;
   %let dtype=qol;
   %let dname=qol;
   %let scrgrd=Scores;
   %let aevar=qol;
   %let dyaxis=%str((0 to 100 by 10));
%end;

%if &rescale.=0 %then %do;
   %let rtitle=Unadjusted;
%end;

%else %if %eval(&rescale.>0) %then %do;
   %let rtitle=Prorated;
%end;

**================**;
**  Data Format   **;
**================**;

proc format;
   value yyesnof 1='Yes'
                 2='No';

   value clinsigf 1='<=50'
                  2='>50';
run;


**--------------------------------------------------------------------------**;
******************************************************************************;
*****                 PROMPT ERROR MESSAGE WHEN NECESSARY                *****;
******************************************************************************;
**--------------------------------------------------------------------------**;

***************************************************************;
**** Display error when data source is not found or invalid ***;
***************************************************************;

%if &datasource. = %then %do;
   %let myErrMsg = "Data source is not found.";
   %goto exit;
%end;

%else %if &datasource. ^=1 and &datasource. ^=2 %then %do;
   %let myErrMsg = "Data source is invalid.";
   %goto exit;
%end;

********************************************************************;
**** Display error when data source is 1 and studynum is missing ***;
********************************************************************;

%if &datasource. = 1 and &studynum. = %then %do;
   %let myErrMsg = "Data Source of Cancer Center database was indicated, yet, study number is not provided.";
   %goto exit;
%end;

***********************************************************;
**** Display error when data type is missing or invalid ***;
***********************************************************;

%if &datatype. = %then %do;
   %let myErrMsg = "Datatype is not found.";
   %goto exit;
%end;

%else %if &datatype. ^=1 and &datatype. ^=2 %then %do;
   %let myErrMsg = "Datatype is invalid.";
   %goto exit;
%end;

*****************************************;
**** Display error when by is missing ***;
*****************************************;

%if &by. = %then %do;
   %let myErrMsg = "By group is not found.";
   %goto exit;
%end;

************************************************;
**** Display error when timepoint is missing ***;
************************************************;

%if &timepoint. = %then %do;
   %let myErrMsg = "Timepoint is not found.";
   %goto exit;
%end;

*********************************************************;
**** Display error when toxicity_list is missing     ****;
*********************************************************;
%if &toxicity_list. = %then %do;
   %let myErrMsg = "No variable found in parameter TOXICITY_LIST.";
   %goto exit;
%end;

%let cntvar=1;

**** Get the number of toxicities specify by user;
%do %while (%scan(&toxicity_list., &cntvar.) ^= );
   %let cntvar   = %eval(&cntvar. + 1);
%end;

%let cntvar = %eval(&cntvar. - 1);

**********************************************************;
**** Display error when rescale is missing or invalid  ***;
**********************************************************;

%if &rescale. = %then %do;
   %let myErrMsg = "rescale is not found.";
   %goto exit;
%end;

%else %if %sysevalf(&rescale. < 0) %then %do;
   %let myErrMsg = "rescale is invalid.";
   %goto exit;
%end;


**=======================**;
**  Required Datasets    **;
**=======================**;

**** if using cancer center database;
%if &datasource. = 1 %then %do;
   %crtlibn (d=%upcase(&studynum.));

   proc copy in=pdata out=work;
   run;

%end;

*******************************************************;
**** Display error when CRSE dataset does not exist ***;
*******************************************************;

%if %sysfunc(exist(crse)) = 0 %then %do;
   %let myErrMsg = "crse dataset does not exist.";
   %goto exit;
%end;

************************************************************;
**** Display error when CYTOX/QOL dataset does not exist ***;
************************************************************;

%if %sysfunc(exist(&dname)) = 0 %then %do;
   %let myErrMsg = "&dname. dataset does not exist.";
   %goto exit;
%end;


************************************************************;
**** Display error when PRODATA dataset does not exist   ***;
**** data type is CTCAE                                  ***;
************************************************************;

%if %upcase (&dname.)=CYTOX %then %do;
   %if %sysfunc(exist(protdata)) = 0 %then %do;
      %let myErrMsg = "protdata dataset does not exist.";
      %goto exit;
   %end;
%end;

**==========================**;
**  Other Required Datasets **;
**==========================**;

**** CRSE dataset;
proc sql;
   create table crse0 as
   select dcntr_id, &by., evalae
   from work.crse
   order by dcntr_id;
quit;

**** Determine the variable type, format and label for CRSE dataset;
proc sql;
  create table _w1 as
  select *
  from sashelp.vcolumn
  where upcase(libname)="WORK"
  and upcase(memname)="CRSE0";
quit;

**** Retrieve the type, format and label for By variable;
data _null_;
   set _w1 end=eof;

   if (label=' ') then label=name;

   if (upcase(name)=%upcase("&by")) then do;
      if (format=' ') and (type='char') then format=compress('$char' || trim(left(length)) || '.');
      if (format=' ') and (type='num') then format='8.';

      call symput('byf', trim(left(format)));
      call symput('bylabl', trim(left(label)));
      call symput('bytype', trim(left(type)));
   end;
run;

**** Determine the number of level used in the BY parameter;
proc sql;
   create table _wchkbylvl as
   select distinct &by.
   from crse0;
quit;

data _null_;
   set _wchkbylvl end=eof;
   call symput ('bylevel' || trim (left(_n_)), &by.);
   call symput ('bylvllbl' || trim (left(_n_)), put (&by., &byf.));

   if eof then call symput ('numlevel', trim(left(_n_)));
run;

*********************************************************************************;
**** Verify the toxicity list provided by users to make sure they are unique ****;
*********************************************************************************;

%if &datatype.=1 %then %do;
   proc sort data=work.cytox;
      by dcntr_id;
   run;

   proc sort data=work.protdata;
      by dcntr_id;
   run;

   **** Make sure the cytox dataset provided are for the intended population;
   data _wcytox_tmp;
      merge crse0 (in=in1 keep=dcntr_id)
            cytox (in=in2 keep=dcntr_id &timepoint. toxicity grade);
      by dcntr_id;
      if in1 and in2;
   run;

   **** Make sure the protdata dataset provided are for the intended population;
   data _wprotdata_tmp;
      merge crse0 (in=in1 keep=dcntr_id)
            protdata (in=in2 keep=dcntr_id &timepoint.);
      by dcntr_id;
      if in1 and in2;
   run;

   **** All cycles needed to exist in order to run AUC macro, data can be missing at that cycle;
   proc sql;
      create table _wchktimelvl0 as
      select distinct &timepoint.
      from _wcytox_tmp
      order by &timepoint.;
   quit;

   **** Determine the unique treatment cycles and if patient is evaluable for AE;
   proc sql;
      create table _wpttime as
      select l.*, r.&timepoint.
      from crse0 l left join _wchktimelvl0 r
      on l.dcntr_id
      where l.evalae=1
      order by l.dcntr_id, r.&timepoint.;
   quit;

   proc sql;
      create table _wpttime_tmp as
      select distinct dcntr_id, max (&timepoint.) as mxcyc_prot
      from _wprotdata_tmp
      group by dcntr_id
      order by dcntr_id;
   quit;

   proc sql;
      create table protdata0 as
      select l.*, r.mxcyc_prot
      from _wpttime l left join _wpttime_tmp r
      on l.dcntr_id = r.dcntr_id
      having l.&timepoint. <= r.mxcyc_prot
      order by l.dcntr_id, l.&timepoint.;
   quit;

   proc sort data=_wcytox_tmp;
      by dcntr_id &timepoint.;
   run;

   **** Assume Grade 0 when certain toxicity is not reported, but, AE entries is available at that cycle;
   proc sql;
      create table checkAEgrade as
      select distinct l.dcntr_id, l.&timepoint., 1 as assume_grd0, 0 as grade, r.&by., r.mxcyc_prot
      from _wcytox_tmp l left join protdata0 r
      on l.dcntr_id = r.dcntr_id and l.&timepoint. = r.&timepoint.
      having l.&timepoint. <= r.mxcyc_prot
      order l.dcntr_id, l.&timepoint.;
   quit;

   *** Include only patients who indicated as evaluable for AE and with treatment cycles available;
   data _wcytox0;
      merge protdata0    (in=in1)
            _wcytox_tmp  (in=in2);
      by dcntr_id &timepoint.;
      if in1;
      evalpt=1;
   run;

   **** CYTOX dataset - Overall AE analysis;
   **** Grade 0 will be pre-filled at each cycle if at least one AE was reported at that cycle;
   **** and was indicated by user to do so;
   data cytox0;
      set _wcytox0

      %if %upcase (&incmiss.)=Y %then %do;
         checkAEgrade (drop=assume_grd0 mxcyc_prot)
      %end;
      ;
   run;

%end;

%if &datatype.=2 %then %do;
   proc sql;
      create table qol_items as
      select distinct qol, _label_
      from work.qol
   quit;

   data _null_;
      set qol_items end=eof;

      call symput ('qlist_'  || trim (left(_n_)), qol);
      call symput ('qlabel_' || trim (left(_n_)), _label_);

      if eof then call symput ('n_allqol', trim (left(_n_)));
   run;

   **** Create the format for the QOL items used;
   proc format;
      value $ qolf
         %do i=1 %to &n_allqol.;
            "&&&qlist_&i." = "&&&qlabel_&i."
         %end;
      ;
   run;

   **** If there is no entries of QOL whatsoever, that QOL will be set to missing;
   **** QOL dataset - Base dataset;
   proc sql;
      create table qol0 as
      select dcntr_id, &timepoint., qol format=$qolf., _label_, scores
      from work.qol
      order by dcntr_id;
   quit;
%end;

proc sql;
   create table _c2 as
   select *
   from sashelp.vcolumn
   where libname="WORK"
   and memname=%upcase("&dname.0");
quit;

**** Retrieve the format and label for Toxicity/QOL AE and timepoints;
data _null_;
   set _c2;
   if (label=' ') then label=name;
   if (upcase(name)=%upcase("&aevar.")) then do;
      call symput('txf',trim(left(format)));
      call symput('txlbl',trim(left(label)));
   end;

   if (upcase(name)=%upcase ("&timepoint.")) then do;
      if type='char' then do;
         call symput ('char_tmpt', 'y');
      end;

      if type='num'  and format=' ' then format=compress(trim(left(length)) || '.');

      call symput('tmptf',trim(left(format)));
      call symput('tmptlbl',trim(left(label)));
   end;
run;

************************************************************;
**** Display error when Timepoint is Characters field    ***;
************************************************************;

%if %upcase (&char_tmpt.)=Y %then %do;
   %let myErrMsg = "TIMEPOINT required a numeric field";
   %goto exit;
%end;

**** Declare each toxicity as macro variable;
data _wtxlist0;
   %do i=1 %to &cntvar.;
      %if &datatype.=1 %then %do;
         toxvar&i.=%scan (&toxicity_list., &i.);
         toxlbl&i.=put (toxvar&i., &txf.);
      %end;

      %if &datatype.=2 %then %do;
         length toxvar&i toxlbl&i. $ 255.;
         format toxlbl&i. $qolf.;

         toxvar&i.="%scan (&toxicity_list., &i.)";
         toxlbl&i.= toxvar&i.;
      %end;
   %end;
run;

**** Remove any duplicates toxicities codes;
proc transpose data=_wtxlist0 out=_wf_txlist0;
   var %do i=1 %to &cntvar.;
       toxvar&i.
      %end;
      ;
run;

proc sort data=_wf_txlist0 nodupkeys;
   by col1;
run;

**** To retain the order of AE as it was specified by user;
proc sort data=_wf_txlist0;
   by _name_;
run;


**** Change the obs number if we want to accept more toxicity variables for analysis;
data _null_;
   set _wf_txlist0 (firstobs=1 obs=8) end=eof;
   call symput ('txvar' || trim(left(_n_)),  trim(left(col1)));
   call symput ('utxlbl' || trim (left(_n_)), put (col1, &txf.));
   if eof then call symput ('numvar', trim(left( _n_)));
run;

**** Determine the number of timepoints used in the timepoint parameter;
proc sql;
   create table _wchktimelvl as
   select distinct &timepoint.
   from &dname.0
   order by &timepoint.;
quit;

data _null_;
   length alltmpt1 alltmpt2 $255.;
   set _wchktimelvl end=eof;
   retain alltmpt1 alltmpt2;

   alltmpt1 = compbl (alltmpt1 || &timepoint.);
   alltmpt2 = compbl (alltmpt2 || compress ('_' || &timepoint.));

   if _n_=1 then call symput ('tmpt1level', &timepoint.);

   call symput ('tmptlevel' || trim (left(_n_)), &timepoint.);
   call symput ('tmptlvllbl' || trim (left(_n_)), put (&timepoint., &tmptf.));

   if eof then do;
      call symput ('numtmptlevel', trim(left(_n_)));
      call symput ('alltmptlevel1', alltmpt1);
      call symput ('alltmptlevel2', alltmpt2);
   end;

run;

**** All timepoints are needed in order to run AUC macro;
proc sql;
   create table crse1 as
   select l.*, r.&timepoint.
   from crse0 l left join _wchktimelvl  r
   on l.dcntr_id;
quit;

**=============================================================**;
**                   DATA MANIPULATIONS                        **;
** *********This applicable for CTCAE datasets ONLY *************;
** Maximum grade across ALL Adverse Events at each timepoints  **;
**=============================================================**;

%if &datatype.=1 %then %do;

   proc sort data=cytox0 out=_wov_mxtox;
      by dcntr_id &timepoint. grade;
   run;

   **** mytimepoint store the unformatted values of timepoint;
   data _wmxtox;
      set _wov_mxtox;
      by dcntr_id &timepoint. grade;
      if last.&timepoint.;

      mytimepoint=&timepoint.;
   run;

   proc sort data=_wmxtox out=_ws_mxtox;
      by dcntr_id &by.;
   run;

   proc transpose data=_ws_mxtox out=_wf_mxtox;
      by dcntr_id &by.;
      *id &timepoint.;
      id mytimepoint;
      var grade;
   run;

   **** Calculate the AUC values based only on all known values (nobase=3);
   %auc(data=_wf_mxtox,
        yvars=&alltmptlevel2.,
        rescale=&rescale.,
        nobase=3,
        fixed=1);

   proc means data=_wf_mxtox;
      format &alltmptlevel2. 8.2;
      var &alltmptlevel2.;
      class &by.;
      output out=_woutmn;
   run;

   data _waucgraph;
      set _woutmn;
      drop _type_ _freq_ _stat_;
      where upcase (_stat_)='MEAN' and
         %if &bytype.=char %then %do;
            &by. ^=' '
         %end;
         %else %if &bytype.=num %then %do;
            &by. ^=.
         %end;
      ;
   run;

   proc sort data=_waucgraph;
      by &by.;
   run;

   proc transpose data=_waucgraph out=_wf_aucgraph;
      by &by.;
   run;

   **** N at each cycles;
   proc means data=_wf_mxtox;
      format &alltmptlevel2. 8.2;
      var &alltmptlevel2.;
      class &by.;
      output out=_woutobs;
   run;

   data _wobs;
      set _woutobs;
      drop _type_ _freq_ _stat_;
      where upcase (_stat_)='N' and
         %if &bytype.=char %then %do;
            &by. ^=' '
         %end;
         %else %if &bytype.=num %then %do;
            &by. ^=.
         %end;
      ;
   run;

   proc sort data=_wobs;
      by &by.;
   run;

   proc transpose data=_wobs out=_wf_obs (rename=(col1=nobs));
      by &by.;
   run;

   **** Data set with N, mean of max grade per cycle for each group;
   **** To produce AUC graph;
   proc sql;
      create table f_aucgraph0 as
      select l.*, r.nobs format=8., input (substr (l._name_, 2), 8.) as &timepoint.
      from  _wf_aucgraph l left join _wf_obs r
      on l.&by. = r.&by. and
         l._name_= r._name_
         order by &by., &timepoint;
   quit;

   **** Getting the Means of AUC for each group;
   proc means data=_wf_mxtox;
      var area;
      class &by.;
      output out=_woutauc (where=(_type_=1)) mean=mean;
   run;

   data _wgetaucval;
      length aucval $250.;
      set _woutauc end=eof;
      keep aucval;

      aucval = compbl ("&bylabl. " || put (&by., &byf.) ||
              " =" || trim (left(put (mean, 8.1))));
   run;

   data _null_;
      set  _wgetaucval end=eof;

      call symput ('aucval_' || trim (left(_n_)), aucval);

      if eof then call symput ('totby', trim(left(_n_)));
   run;

   **** Dataset to run table macro summary;
   data f_mxtoxb;
      set _wf_mxtox;
      format anyae g3plusae yyesnof.;

      mxgrd=max (of &alltmptlevel2.);

      if mxgrd = . then anyae=.;
      else if mxgrd >= 1 then anyae=1;
      else anyae=2;

      if mxgrd = . then g3plusae=.;
      else if mxgrd >= 3 then g3plusae=1;
      else g3plusae=2;

      label area        = "&rtitle. AUC: Adverse Events"
            mxgrd       = 'Maximum Adverse Events'
            anyae       = 'Any Adverse Events'
            g3plusae    = 'Any Grade 3+ Adverse Events';
   run;
%end;

**=============================================================**;
**                   DATA MANIPULATIONS                        **;
**                (For Both CTCAE and QOLAE)                   **;
** Grade per adverse events or Scores per QOL AE at each timepoints **;
** If multiple grade was reported per AE at specific timepoints **;
** proc transpose will fail and users need to fix their data before re-run the program;
**=============================================================**;

%do y=1 %to &numvar.;

   **** To make sure all patients have an observation for each timepoint;
   **** and also an observation for each toxicity within each timepoint;
   %if &datatype.=1 %then %do;

      proc sql;
         create table _wcytox1 as
         select distinct l.dcntr_id, l.&timepoint., l.&by., l.evalae, l.evalpt, l.mxcyc_prot, r.assume_grd0
         from cytox0 (where=(grade^=0 and toxicity^=. and evalae^=.)) l left join checkAEgrade r
         on l.dcntr_id = r.dcntr_id;
      quit;

      proc sql;
         create table _ws_mxtox_&y. as
         select l.*, r.toxicity, r.grade
         from _wcytox1 l left join cytox0 (where=(toxicity=&&&txvar&y.)) r
         on l.dcntr_id    = r.dcntr_id and
            l.&timepoint. = r.&timepoint.;
      quit;

      **** If AE is not reported at the time point, assumed patient do not have that AE, will code as 0;
      data _wmxtox_&y.;
         set _ws_mxtox_&y.;

         %if %upcase (&incmiss.)=Y %then %do;
            if grade=. and assume_grd0=1 then grade=0;
         %end;

         mytimepoint=&timepoint.;

      run;

   %end;

   %if &datatype.=2 %then %do;

      **** To make sure all patients have an observation for each timepoint;
      proc sql;
         create table _wmxtox_&y.  as
         select l.*, r.qol, r.scores
         from crse1 l left join qol0 (where=(upcase(qol)=%upcase("&&&txvar&y."))) r
         on l.dcntr_id = r.dcntr_id and
            l.&timepoint. = r.&timepoint. ;
      quit;

      **** Need this extra step in order to create the mytimepoint variable without carried over the data format;
      data _wmxtox_&y.;
         set _wmxtox_&y.;
         mytimepoint=&timepoint.;
      run;
   %end;

   proc sort data=_wmxtox_&y. out=_wss_mxtox_&y.;
      by dcntr_id &by.;
   run;

   proc transpose data=_wss_mxtox_&y. out=_wf_mxtox_&y.;
      by dcntr_id &by.;
      *id &timepoint.;
      id mytimepoint;
      var &scrgrd.;
   run;

   **** Calculate the AUC values;
   **** nobase=3 refer to use the AUC based only on all known values;
   %auc(data=_wf_mxtox_&y.,
        yvars=&alltmptlevel2.,
        rescale=&rescale.,
        nobase=3,
        fixed=1);

   proc means data=_wf_mxtox_&y.;
      format &alltmptlevel2. 8.2;
      var &alltmptlevel2.;
      class &by.;
      output out=_woutmn_&y.;
   run;

   **** mean at each cycles;
   data _waucgraph_&y.;
      set _woutmn_&y.;
      drop _type_ _freq_ _stat_;
      where upcase (_stat_)='MEAN' and
         %if &bytype.=char %then %do;
            &by. ^=' '
         %end;
         %else %if &bytype.=num %then %do;
            &by. ^=.
         %end;
      ;
   run;

   proc sort data=_waucgraph_&y.;
      by &by.;
   run;

   proc transpose data=_waucgraph_&y. out=_wf_aucgraph_&y.;
      by &by.;
   run;

   **** N at each cycles;
   proc means data=_wf_mxtox_&y.;
      format &alltmptlevel2. 8.2;
      var &alltmptlevel2.;
      class &by.;
      output out=_woutobs_&y.;
   run;

   data _wobs_&y.;
      set _woutobs_&y.;
      drop _type_ _freq_ _stat_;
      where upcase (_stat_)='N' and
         %if &bytype.=char %then %do;
            &by. ^=' '
         %end;
         %else %if &bytype.=num %then %do;
            &by. ^=.
         %end;
      ;
   run;

   proc sort data=_wobs_&y.;
      by &by.;
   run;

   proc transpose data=_wobs_&y. out=_wf_obs_&y. (rename=(col1=nobs));
      by &by.;
   run;

   **** Data set with mean of max grade per cycle for each group;
   **** To produce AUC graph;
   proc sql;
      create table f_aucgraph0_&y. as
      select l.*, r.nobs format=8., input (substr (l._name_, 2), 8.) as &timepoint.
      from _wf_aucgraph_&y. l left join _wf_obs_&y. r
      on l.&by. = r.&by. and
         l._name_= r._name_
      order by &by., &timepoint;
   quit;

   **** Getting the Means of AUC for each group;
   proc means data=_wf_mxtox_&y.;
      var area;
      class &by.;
      output out=_woutauc_&y. (where=(_type_=1)) mean=mean;
   run;

   data _wgetaucval_&y.;
      length aucval $250.;
      set _woutauc_&y. end=eof;
      keep aucval;

      aucval = compbl ("&bylabl. " || put (&by., &byf.) || "=" || trim (left(put (mean, 8.1))));

   run;

   data _null_;
      set  _wgetaucval_&y. end=eof;

      call symput ("aucval_&y._" || trim (left(_n_)), aucval);

      if eof then call symput ("totby_&y.", trim(left(_n_)));
   run;

   %if &datatype.=1 %then %do;
      **** Dataset to run table macro summary;
      data f_mxtoxb_&y.;
         set _wf_mxtox_&y.;
         format anyae g3plusae yyesnof.;

         mxgrd=max (of &alltmptlevel2.);

         if mxgrd = . then anyae=.;
         else if mxgrd >= 1 then anyae=1;
         else anyae=2;

         if mxgrd = . then g3plusae=.;
         else if mxgrd >= 3 then g3plusae=1;
         else g3plusae=2;

         label area       = "&rtitle. AUC: &&&utxlbl&y."
               mxgrd      = "Maximum Grade of &&&utxlbl&y."
               anyae      = "Any &&&utxlbl&y."
               g3plusae   = "Any Grade 3+ &&&utxlbl&y.";
      run;
   %end;

   %if &datatype.=2 %then %do;

      **** If baseline is missing, calculate the AUC values based only on all known values (nobase=3);
      **** Dataset to run table macro summary;
      data f_mxtoxb_&y.;
         set _wf_mxtox_&y.;
         format le50ae clinsigf.;

         mxgrd=max (of &alltmptlevel2.);

         if mxgrd = . then le50ae=.;
         else if mxgrd <= 50 then le50ae=1;
         else le50ae=2;

         label area      = "&rtitle. AUC: &&&utxlbl&y."
               mxgrd     = "Maximum Scores of &&&utxlbl&y."
               le50ae    = "Maximum Scores of &&&utxlbl&y.";
      run;

      **** !!! If baseline is missing, set AUC values to missing (nobase=2) !!!;
      **** This only applicable for QOL data because when QOL data is missing, it is missing;
      **** For AE data, we assumed a grade 0 when AE is not reported;

      %auc(data=_wf_mxtox_&y.,
           yvars=&alltmptlevel2.,
           name=area2,
           rescale=&rescale.,
           nobase=2,
           fixed=1);

      proc means data=_wf_mxtox_&y.;
         format &alltmptlevel2. 8.2;
         var &alltmptlevel2.;
         class &by.;
         output out=_w2outmn_&y.;
      run;

      data _w2aucgraph_&y.;
         set _w2outmn_&y.;
         drop _type_ _freq_ _stat_;
         where upcase (_stat_)='MEAN' and
            %if &bytype.=char %then %do;
               &by. ^=' '
            %end;
            %else %if &bytype.=num %then %do;
               &by. ^=.
            %end;
         ;
      run;

      proc sort data=_w2aucgraph_&y.;
         by &by.;
      run;

      proc transpose data=_w2aucgraph_&y. out=_w2f_aucgraph_&y.;
         by &by.;
      run;

      **** N at each cycles;
      proc means data=_wf_mxtox_&y.;
         format &alltmptlevel2. 8.2;
         var &alltmptlevel2.;
         class &by.;
         output out=_w2outobs_&y.;
      run;

      data _w2obs_&y.;
         set _w2outobs_&y.;
         drop _type_ _freq_ _stat_;
         where upcase (_stat_)='N' and
            %if &bytype.=char %then %do;
               &by. ^=' '
            %end;
            %else %if &bytype.=num %then %do;
               &by. ^=.
            %end;
         ;
      run;

      proc sort data=_w2obs_&y.;
      by &by.;
      run;

      proc transpose data=_w2obs_&y. out=_w2f_obs_&y. (rename=(col1=nobs));
         by &by.;
      run;

      **** Data set with N, mean of max grade per cycle for each group;
      **** To produce AUC graph;
      proc sql;
      create table f_auc2graph0_&y. as
         select l.*, r.nobs format=8., input (substr (l._name_, 2), 8.) as &timepoint.
         from _w2f_aucgraph_&y. l left join _w2f_obs_&y. r
         on l.&by. = r.&by. and
            l._name_= r._name_
            order by &by., &timepoint;
      quit;

      **** Getting the Means of AUC for each group;
      proc means data=_wf_mxtox_&y.;
         var area2;
         class &by.;
         output out=_w2outauc_&y. (where=(_type_=1)) mean=mean;
      run;

      data _w2getaucval_&y.;
         length aucval $250.;
         set _w2outauc_&y. end=eof;
         keep auc2val;

         auc2val = compbl ("&bylabl. " || put (&by., &byf.) || "=" || trim (left(put (mean, 8.1))));
      run;

      data _null_;
         set  _w2getaucval_&y. end=eof;

         call symput ("auc2val_&y._" || trim (left(_n_)), auc2val);

         if eof then call symput ("tot2by_&y.", trim(left(_n_)));
      run;

      **** Dataset to run table macro summary for nobase=2;
      data f2_mxtoxb_&y.;
         set _wf_mxtox_&y.;
         format le50ae clinsigf.;

         mxgrd=max (of &alltmptlevel2.);

         if mxgrd = . then le50ae=.;
         else if mxgrd <= 50 then le50ae=1;
         else le50ae=2;

         label area2     = "&rtitle. AUC: &&&utxlbl&y."
               mxgrd     = "Maximum Scores of &&&utxlbl&y."
               le50ae    = "Maximum Scores of &&&utxlbl&y.";
      run;

   %end;
%end;


**=============================================================**;
**=============================================================**;
**=============================================================**;
**                  CREATE AUC GRAPH                           **;
**=============================================================**;
**=============================================================**;
**=============================================================**;

options nodate;
ods graphics / outputfmt=png;
ods rtf file="auc_ae_figures_&dtype._&rtitle.AUC.doc" style=journal;

%do y=1 %to &numvar.;
   title1 "Figure 1_&y.: %trim(&&&utxlbl&y.) %trim (&scrgrd.) Over Time By %trim(&bylabl.)";
   title2 "Notes: If baseline is missing, the AUC is calculated using known data";

   footnote1 h=9pt j=left "&rtitle. AUC:";
   %do z=1 %to &&&totby_&y.;
      footnote%eval(&z.+1) h=9pt j=left "%trim(&&&aucval_&y._&z.)";
   %end;

   proc sgplot data=f_aucgraph0_&y.;
      *format &timepoint. 8. &by. &byf.;
      format &timepoint. &tmptf. &by. &byf.;
      series x=&timepoint. y=col1 / group=&by. markers name="pline";
      band x=&timepoint. upper=col1 lower=0 / transparency=0.6 group=&by. name="pband";
      xaxis values=(&alltmptlevel1.) minor label="&tmptlbl.";
      yaxis values=&dyaxis. label="Mean &scrgrd.";
      keylegend "pline" "pband" / position=bottom title="Lines";
      xaxistable nobs  / x=&timepoint.  class=&by. location=outside;
   run;

%end;

%if &datatype.=1 %then %do;
   title1 "Figure 2: Maximum Adverse Events Grade Over Time By %trim(&bylabl.)";
   title2 "Notes: If baseline is missing, the AUC is calculated using known data";

   footnote1 h=9pt j=left "&rtitle. AUC:";
   %do z=1 %to &totby.;
      footnote%eval(&z.+1) h=9pt j=left "%trim(&&aucval_&z.)";
   %end;

   proc sgplot data=f_aucgraph0;
      *format &timepoint. 8. &by. &byf.;
      format &timepoint. &tmptf. &by. &byf.;
      series x=&timepoint. y=col1 / group=&by. markers name="pline";
      band x=&timepoint. upper=col1 lower=0 / transparency=0.6 group=&by. name="pband";
      xaxis values=(&alltmptlevel1.) minor label="&tmptlbl.";
      yaxis values=(0 to 5 by 1) label='Mean Grade';
      keylegend "pline" "pband" / position=bottom title="Lines";
      xaxistable nobs  / x=&timepoint.  class=&by. location=outside;
   run;
%end;


%if &datatype.=2 %then %do;
   %do y=1 %to &numvar.;
      title1 "Figure 2_&y.: %trim(&&&utxlbl&y.) %trim (&scrgrd.) Over Time By %trim(&bylabl.)";
      title2 "Notes: If a patient has missing baseline, that patient is not used in calculating the AUC";
      footnote1 h=9pt j=left "&rtitle. AUC:";

      %do z=1 %to &&&tot2by_&y.;
         footnote%eval(&z.+1) h=9pt j=left "%trim(&&&auc2val_&y._&z.)";
      %end;

      proc sgplot data=f_auc2graph0_&y.;
         *format &timepoint. 8. &by. &byf.;
         format &timepoint. &tmptf. &by. &byf.;
         series x=&timepoint. y=col1 / group=&by. markers name="pline";
         band x=&timepoint. upper=col1 lower=0 / transparency=0.6 group=&by. name="pband";
         xaxis values=(&alltmptlevel1.) minor label="&tmptlbl.";
         yaxis values=&dyaxis. label="Mean &scrgrd.";
         keylegend "pline" "pband" / position=bottom title="Lines";
         xaxistable nobs  / x=&timepoint.  class=&by. location=outside;
      run;

   %end;
%end;

ods rtf close;

ods graphics off;

/*** notes two samples can only be used for two arm ptype=14 ***/

**=============================================================**;
**=============================================================**;
**=============================================================**;
**                 GENERATE SUMMARY TABLES                     **;
**=============================================================**;
**=============================================================**;
**=============================================================**;

footnote;

%if &datatype.=1 %then %do;
   %do y=1 %to &numvar.;
      %table(dsn=f_mxtoxb_&y., by=&by.,
             var=area mxgrd mxgrd anyae g3plusae,
             type=1 1 2 2 2,
             %if %eval (&numlevel.=2) %then %do;
                ptype=14 14 1 1 1,
             %end;
             outdat=tb1_&y.,
             ttitle1=Table 1_&y.: %trim(&&&utxlbl&y.) Summary by &bylabl.,
             ttitle2=%str(Notes: If baseline is missing, the AUC is calculated using known data),
             pfoot=y,
             titlesz=8,bodysz=7,footsz=7);
   %end;

   %table(dsn=f_mxtoxb, by=&by.,
          var=area mxgrd mxgrd anyae g3plusae,
          type=1 1 2 2 2,
          %if %eval (&numlevel.=2) %then %do;
             ptype=14 14 1 1 1,
          %end;
          outdat=tb2,
          ttitle1=Table 2: Overall Adverse Events Summary by &bylabl.,
          ttitle2=%str(Notes: If baseline is missing, the AUC is calculated using known data),
          pfoot=y,
          titlesz=8,bodysz=7,footsz=7);

   %tablemrg (dsn    = %do y=1 %to &numvar.;
                          tb1_&y.
                       %end;
                       tb2,
              outdoc = auc_ae_tbl_&dtype._&rtitle.AUC);
%end;

%if &datatype.=2 %then %do;
   %do y=1 %to &numvar.;
      %table(dsn=f_mxtoxb_&y., by=&by.,
             var=area mxgrd le50ae,
             type=1 1 2,
             %if %eval (&numlevel.=2) %then %do;
                ptype=14 14 1,
             %end;
             outdat=tb1_&y.,
             ttitle1=Table 1_&y.: %trim(&&&utxlbl&y.) Summary by &bylabl.,
             ttitle2=%str(Notes: If baseline is missing, the AUC is calculated using known data),
             pfoot=y,
             titlesz=8,bodysz=7,footsz=7);
   %end;

   %do z=1 %to &numvar.;
      %table(dsn=f2_mxtoxb_&z., by=&by.,
             var=area2 mxgrd le50ae,
             type=1 1 2,
             %if %eval (&numlevel.=2) %then %do;
                ptype=14 14 1,
             %end;
             outdat=tb2_&z.,
             ttitle1=Table 2_&z.: %trim(&&&utxlbl&z.) Summary by &bylabl.,
             ttitle2=%str(Notes: If a patient has missing baseline, that patient is not used in calculating the AUC),
             pfoot=y,
             titlesz=8,bodysz=7,footsz=7);
   %end;


   %tablemrg (dsn    = %do y=1 %to &numvar.;
                          tb1_&y.
                       %end;

                       %do z=1 %to &numvar.;
                          tb2_&z.
                       %end;
                       ,
              outdoc = auc_ae_tbl_&dtype._&rtitle.AUC);
%end;


**--------------------------------------------------------------------------**;
******************************************************************************;
*****                        REMOVE WORKING DATA SET                     *****;
******************************************************************************;
**--------------------------------------------------------------------------**;

proc datasets library=work nolist;
   delete _w: _tb6_: ;
quit;


**--------------------------------------------------------------------------**;
******************************************************************************;
*****                 EXIT THE PROGRAM WHEN ERROR OCCUR                  *****;
******************************************************************************;
**--------------------------------------------------------------------------**;
%exit:

**--------------------------------------------------------------------------**;
******************************************************************************;
*****                       DISPLAY THE MACRO STATUS                     *****;
******************************************************************************;
**--------------------------------------------------------------------------**;
data _null_;
   put "---------------------------";
   %if &myErrMsg. ^= %then %do;
      put "ERROR: " &myErrMsg.;
   %end;
   put "---------------------------";
run;


%mend auc_ae;
