library(conflicted)
suppressMessages(conflict_prefer("filter", "dplyr"))

library(DOPE)  

library(xml2)  # read_html()
library(rvest)  # html_nodes(), html_text()
library(purrr)  # map_dfr()

suppressPackageStartupMessages(library(dplyr))  # %>%, bind_rows()   
library(stringr)  # str_detect, str_to_lower()
library(tidyr) # pivot_longer()

library(tibble)  # tibble()
library(usethis)  # use_data()

Scrape No Slang Data

There is an additional source where slang (synonyms) were scraped: https://www.noslang.com/drugs/dictionary

The methods in which this data were extracted are very similar to how data from the DEA were extracted but we will include them here as well:

get_slang <- function(page){
  street_name <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", 
                                  page)) %>% 
    html_nodes("table abbr") %>%
    html_text()
  desc <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", 
                           page)) %>% 
    html_nodes("abbr") %>%
    html_attr("title")
  
  tibble("street_name" = street_name,
         "description" = desc)
}

# creates a vector of the '#' sign plus all lowers case letters of the alphabet
pages <- c("#", letters) 

# iterate the function over the vector of letters to get information of the 
#   slang term and it's description
noslang_raw <- map_dfr(pages, get_slang) 


use_data(noslang_raw, overwrite = TRUE)

Finding Drug Names

In the No Slang dataset, noslang_street_names (which was scraped from the NoSlang website) there is a description variable which contains both drug names and other phrases, for things like amounts. For the DOPE package we extracted the drug names. To do this, a dataset called ns was created that contains the unique words/phrases in description. The code below, adds indicator variables beginning with “d_”. That set of variables were created by first checking for drugs that were mentioned by the DEA files. The total number of known drugs in the description were tallied. The records that contained 0 drug names were manually checked and new “d_” variables were added if the description was for a drug. The new drugs mentioned by NoSlang.com are marked with “# NS”. There are some misspelling and slang words in the description variable. They appear in a | separated list in the str_detect() calls below.


# add note to use singular instead of plural
# add note to use common abbreviations (lsd) vs long names

# This was used to shorten the list of phrases to check to find the drug names.
ns <- data.frame(description = 
                   unique(tolower(noslang_raw$description)))

# Use this instead to make analysis file
ns <- noslang_raw %>%
  mutate(description = tolower(description))

# This code adds indicator variables holding > 0 if a drug name appears in the
#   description variable.  Those are used to drop phrases that do not contain 
#   drug names.
checkForDrugs <- ns %>%
  mutate(d_2cb = as.numeric(str_detect(description, "2cb|nexus"))) %>%          # NS
  mutate(d_alphaEt = as.numeric(str_detect(description,
                                           "alpha-ethyltryptamine"))) %>%       # NS
  mutate(d_alprazolam = as.numeric(str_detect(description, "xanax"))) %>%
  mutate(d_amphetamine = as.numeric(str_detect(description,
                                               "amphetamine|speed"))) %>%
  mutate(d_amt = as.numeric(str_detect(description,
                                               "alpha-methyltryptamine"))) %>%  # NS
  mutate(d_amobarbital = as.numeric(str_detect(description,
                                       "amobarbital"))) %>% # NS
  mutate(d_amylNitrite = as.numeric(str_detect(description,
                                               "amyl nitrite"))) %>%            # NS
  mutate(d_barbiturates = as.numeric(str_detect(description,
                                                "barbiturate"))) %>%
  mutate(d_bathSalts  = as.numeric(str_detect(description, "bath salts"))) %>%
  mutate(d_benzodiazepines = as.numeric(str_detect(description,
         "benzodiazepine|benzodiazipines"))) %>%
  mutate(d_clonazepam = as.numeric(str_detect(description, "klonopin"))) %>%
  mutate(d_cocaine = as.numeric(str_detect(description,
         "cocaine|coke|coccaine"))) %>%
  mutate(d_codeine = as.numeric(str_detect(description, "codeine"))) %>%
  mutate(d_crack = as.numeric(str_detect(description, "crack"))) %>%
  mutate(d_dextromethorphan = as.numeric(str_detect(description,
        "dextromethorphan|coricidin|cortison"))) %>%                            # NS
  mutate(d_diazepam = as.numeric(str_detect(description, "valium"))) %>%        # NS
  mutate(d_dmt = as.numeric(str_detect(description, "dimethyltryptamine"))) %>% # NS
  mutate(d_fentanyl = as.numeric(str_detect(description, "fentanyl"))) %>%
  mutate(d_flakka = as.numeric(str_detect(description, "flakka"))) %>%
  mutate(d_gbl = as.numeric(str_detect(description, "gbl"))) %>%                # NS
  mutate(d_ghb = as.numeric(str_detect(description,
                                       "ghb|gamma hydroxybutyrate"))) %>%
  mutate(d_heroin = as.numeric(str_detect(description, "heroin|herion"))) %>%
  mutate(d_hydrocodone = as.numeric(str_detect(description,
         "hydrocodone|vicodin|lortab|loratab"))) %>%
  mutate(d_hydromorphone = as.numeric(str_detect(description,
         "hydromorphone|diluadid"))) %>%
  mutate(d_inhalants = as.numeric(str_detect(description, "inhalant"))) %>%
  mutate(d_isobutylNitrite = as.numeric(str_detect(description,
                                                   "isobutyl nitrite"))) %>%    # NS
  mutate(d_ketamine = as.numeric(str_detect(description, "ketamine"))) %>%
  mutate(d_khat = as.numeric(str_detect(description, "khat"))) %>%
  mutate(d_kratom = as.numeric(str_detect(description, "kratom"))) %>%
  mutate(d_lsd = as.numeric(str_detect(description,
        "lsd|lysergic acid diethylamide"))) %>%
  mutate(d_marijuana = as.numeric(str_detect(description,
         "marijuana|marijuna|cannabis|marajuana|weed|marijauna|maihuana|cannibus|hashish|hasish|blunt|tetrahydrocannabinol|joint|panama red"))) %>%
  mutate(d_mdma = as.numeric(str_detect(description,
                                        "mdma|ecstacy|ecxtasy|ecstasy"))) %>%
  mutate(d_mescaline = as.numeric(str_detect(description,
                                             "peyote|mescaline"))) %>%
  mutate(d_methadone = as.numeric(str_detect(description, "methadone"))) %>%
  mutate(d_methamphetamine = as.numeric(str_detect(description,
         "methamphetamine|crystal myth|crystal rock of meth|methamphetimine|crystal meth"))) %>%
  mutate(d_methcathinone = as.numeric(str_detect(description,
                                                 "methcathinone"))) %>%         # NS
  mutate(d_methaqualone = as.numeric(str_detect(description,
                                                "methaqualone"))) %>%           # NS
  mutate(d_methylphenidate = as.numeric(str_detect(description, "ritalin"))) %>%
  mutate(d_morphine = as.numeric(str_detect(description,
                                            "morphine|morophine"))) %>%
  mutate(d_mushrooms = as.numeric(str_detect(description, "mushroom"))) %>%
  mutate(d_nitrous = as.numeric(str_detect(description, "nitrous oxide"))) %>%  # NS
  mutate(d_opium = as.numeric(str_detect(description, "opium"))) %>%
  mutate(d_oxycodone = as.numeric(str_detect(description,
                                             "oxycodone|oxycontin|oxycotin"))) %>%
  mutate(d_pcp = as.numeric(str_detect(description, "pcp|phencyclidine"))) %>%  # capitalization needs to match dea_factsheets_plus
  mutate(d_psilocybin = as.numeric(str_detect(description, "psilocybin"))) %>%
  mutate(d_rohypnol = as.numeric(str_detect(description, "rohypnol"))) %>%
  mutate(d_salviaDivinorum = as.numeric(str_detect(description,
                                                   "salvia divinorum"))) %>%
  mutate(d_spice = as.numeric(str_detect(description, "spice"))) %>%
  mutate(d_steroids = as.numeric(str_detect(description,
                                            "steroids|steriods|steroid"))) %>%
  mutate(d_u47700 = as.numeric(str_detect(description, "u-47700")))  %>%
  rowwise() %>%
  mutate(known = sum(c_across(starts_with("d_")))) # %>%
  # use this for development
  # select(description, known, everything())

# recode drug names to the 
ns_drugs <- checkForDrugs %>%
  filter(known > 0) %>%
  select(-known) %>%
  mutate(description =
           case_when(description == "nexus" ~ "2cb",
                   description == "speed" ~ "amphetamine",
                   description == "benzodiazipines" ~ "benzodiazipine",
                   description == "coke" ~ "cocaine",
                   description == "coccaine" ~ "cocaine",
                   description == "coricidin" ~ "dextromethorphan",
                   description == "cortison" ~ "dextromethorphan",
                   description == "gamma hydroxybutyrate" ~ "ghb",
                   description == "vicodin" ~ "hydrocodone",
                   description == "lortab" ~ "hydrocodone",
                   description == "loratab" ~ "hydrocodone",
                   description == "herion" ~ "heroin",
                   description == "lysergic acid diethylamide" ~ "lsd",
                   description == "marijuna" ~ "marijuana",
                   description == "cannabis" ~ "marijuana",
                   description == "marajuana" ~ "marijuana",
                   description == "weed" ~ "marijuana",
                   description == "marijauna" ~ "marijuana",
                   description == "maihuana" ~ "marijuana",
                   description == "cannibus" ~ "marijuana",
                   description == "hashish" ~ "marijuana",
                   description == "hasish" ~ "marijuana",
                   description == "blunt" ~ "marijuana",
                   description == "tetrahydrocannabinol" ~ "marijuana",
                   description == "joint" ~ "marijuana",
                   description == "panama red" ~ "marijuana",
                   description == "ecstacy" ~ "mdma",
                   description == "ecxtasy" ~ "mdma",
                   description == "ecstasy" ~ "mdma",
                   description == "peyote" ~ "mescaline",       # need to fix in DEA
                   description == "crystal myth" ~ "methamphetamine",
                   description == "crystal rock of meth" ~ "methamphetamine",
                   description == "crystal meth" ~ "methamphetamine",
                   description == "methamphetimine" ~ "methamphetamine",
                   description == "morophine" ~ "morphine",
                   description == "oxycontin" ~ "oxycodone",
                   description == "oxycotin" ~ "oxycodone",
                   description == "phencyclidine" ~ "pcp",
                   description == "steriods" ~ "steroid",
                   description == "steroids" ~ "steroid",
                   TRUE ~ description))

# don't double count crack as both crack and cocaine (use crack)
# remove cocaine if "crack cocaine"
ns_drugs$d_cocaine[ns_drugs$d_crack > 0] <- 0

# don't double count meth as both meth and amphetamine (use meth)
# remove amphetamine if methamphetamine
ns_drugs$d_amphetamine[ns_drugs$d_methamphetamine > 0] <- 0

# don't triple count mdma as both meth and amphet (use mdma)
# remove amphetamine methamphetamine if methylenedioxymethamphetamine
ns_drugs$d_amphetamine[ns_drugs$d_mdma > 0] <- 0
ns_drugs$d_methamphetamine[ns_drugs$d_mdma > 0] <- 0

noslang_street_names <- ns_drugs %>%
  pivot_longer(cols=starts_with("d_"),
               names_to = "drug",
               values_to = "values",
               names_prefix = "d_") %>%
  filter(values > 0) %>%
  select(-values) %>% 
  mutate(drug = case_when(drug == "alphaEt" ~ "alpha-ethyltryptamine",
                          drug == "amylNitrite" ~ "amyl nitrite",
                          drug == "bathSalts" ~ "bath salts",
                          drug == "isobutylNitrite" ~ "isobutyl nitrite",
                          drug == "nitrous" ~ "nitrous oxide",
                          drug == "salviaDivinorum" ~ "salvia divinorum",
                          TRUE ~ drug)) %>%
  mutate(street_name = str_remove(street_name ,"\\(spanish\\)")) %>%
  filter(! street_name %in% c("are you anywhere?"))


# fix noslang_street_names %>% filter(str_detect(street_name, "/"))


# Add talwin

usethis::use_data(noslang_street_names, overwrite = TRUE)