repo: tlswrap
action: commit
revision: 
path_from: 
revision_from: 635dc8e5a0cf4939952f4fc29c6f511aaaeed981:
path_to: 
revision_to: 
git.thebackupbox.net
tlswrap
git clone git://git.thebackupbox.net/tlswrap
commit 635dc8e5a0cf4939952f4fc29c6f511aaaeed981
Author: epoch 
Date:   Wed Jun 18 06:03:57 2025 +0000

    put PIDs into all of the syslog messages, and attempting a new method of cleaning up children processes that uses prctl. likely broken on BSD now though.

diff --git a/tlswrap.c b/tlswrap.c
index eebc30968b76a003858aaec17958cc96e9e12889..
index ..a12f353485f21ead0929c6edcea9cfcc6f3dcb42 100644
--- a/tlswrap.c
+++ b/tlswrap.c
@@ -13,6 +13,8 @@
 #include 
 #include 

+#include  // prctl() ofc
+
 #include 

 #ifndef NONTLS
@@ -26,7 +28,7 @@
 #include "ja3.h"
 #endif

-//#define TRAFFIC_LOGGING "1.2.3.4" // use this to debug exact request and response data going on. no timestamps inside yet.
+//#define TRAFFIC_LOGGING "21.41.41.4" // use this to debug exact request and response data going on. no timestamps inside yet.
 //#define FORCE_SNI //whether SNI is required to connect to this server.

 const char *SSL_error_string(int e) {
@@ -232,6 +234,7 @@ int verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx) {

 void signal_handler(int sig) {
   if(sig == SIGTERM) { // closing these fds should make it break out of the select loop and cleanup
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: received signal: %d closing([0,1,2])",getpid(),sig);
     close(0);
     close(1);
     close(2);
@@ -239,11 +242,13 @@ void signal_handler(int sig) {
 }

 int main(int argc,char *argv[]) {
+  // found this prctl trick on https://jmmv.dev/2019/11/wait-for-process-group-linux.html
+  prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
   setvbuf(stdin, NULL, _IONBF, 0);
   setvbuf(stdout, NULL, _IONBF, 0);
   setvbuf(stderr, NULL, _IONBF, 0);
   signal(SIGTERM,signal_handler);
-  syslog(LOG_DAEMON|LOG_DEBUG,"started");
+  syslog(LOG_DAEMON|LOG_DEBUG,"%d: started",getpid());
   struct sockaddr_in6 sa6;
   char ra[NI_MAXHOST],rp[NI_MAXSERV];
   char sa[NI_MAXHOST],sp[NI_MAXSERV];
@@ -335,7 +340,7 @@ int main(int argc,char *argv[]) {
   method=TLS_server_method();
   ctx = SSL_CTX_new(method);
   if(!ctx) {
-    syslog(LOG_DAEMON|LOG_ERR,"could not create new SSL context");
+    syslog(LOG_DAEMON|LOG_ERR,"%d: could not create new SSL context",getpid());
     return 1;
   }
 #endif
@@ -352,11 +357,11 @@ int main(int argc,char *argv[]) {
   SSL_CTX_set_ecdh_auto(ctx, 1);

   if(SSL_CTX_use_certificate_chain_file(ctx, cert_chain_file) <= 0) {
-    syslog(LOG_DAEMON|LOG_ERR,"failed to load cert chain file: %s",cert_chain_file);
+    syslog(LOG_DAEMON|LOG_ERR,"%d: failed to load cert chain file: %s",getpid(),cert_chain_file);
     return 1;
   }
   if(SSL_CTX_use_PrivateKey_file(ctx, priv_key_file, SSL_FILETYPE_PEM) <= 0) {
-    syslog(LOG_DAEMON|LOG_ERR,"failed to load private key file: %s",priv_key_file);
+    syslog(LOG_DAEMON|LOG_ERR,"%d: failed to load private key file: %s",getpid(),priv_key_file);
     return 1;
   }

@@ -381,11 +386,11 @@ int main(int argc,char *argv[]) {
       if(errno == 0) {
         //The SSL_ERROR_SYSCALL with errno value of 0 indicates unexpected EOF from the peer.
         //ignore this error by default. not really that interesting.
-        syslog(LOG_DAEMON|LOG_NOTICE,"%s -> %s SSL_accept() failed. %s",ru,su,strerror(errno));
+        syslog(LOG_DAEMON|LOG_NOTICE,"%d: %s -> %s SSL_accept() failed. %s",getpid(),ru,su,strerror(errno));
         return 1;
       }
       if(errno == 104) { //connection reset by peer. also not interesting.
-        syslog(LOG_DAEMON|LOG_NOTICE,"%s -> %s SSL_accept() failed. %s",ru,su,strerror(errno));
+        syslog(LOG_DAEMON|LOG_NOTICE,"%d: %s -> %s SSL_accept() failed. %s",getpid(),ru,su,strerror(errno));
         return 1;
       }
     }
@@ -402,7 +407,8 @@ int main(int argc,char *argv[]) {
       default: priority=LOG_ERR; break;
     }

-    syslog(LOG_DAEMON|priority,"%s -> %s SSL_accept() failed: %s (%d) / %s (%d)",
+    syslog(LOG_DAEMON|priority,"%d: %s -> %s SSL_accept() failed: %s (%d) / %s (%d)",
+      getpid(),
       ru,
       su,
       ERR_error_string(err_err,NULL),
@@ -410,7 +416,7 @@ int main(int argc,char *argv[]) {
       SSL_error_string(ssl_err),
       ssl_err
     );
-    //syslog(LOG_DAEMON|LOG_NOTICE,"%s -> %s SSL_accept() failed. %s",ru,su,ERR_error_string(ERR_get_error(),NULL));
+    //syslog(LOG_DAEMON|LOG_NOTICE,"%d: %s -> %s SSL_accept() failed. %s",getpid(),ru,su,ERR_error_string(ERR_get_error(),NULL));
     //fprintf(stderr,"SSL_accept() failed. %s\n",ERR_lib_error_string(SSL_get_error(ssl,err)));
     return 1;
   }
@@ -423,18 +429,25 @@ int main(int argc,char *argv[]) {
   }

   if(client_cert(ssl)) {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s -> %s we were provided a client cert!",ru,su);
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: %s -> %s we were provided a client cert!",getpid(),ru,su);
   } else {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s -> %s no client cert provided",ru,su);
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: %s -> %s no client cert provided",getpid(),ru,su);
   }
 #endif /* #ifndef NONTLS */
   //fprintf(stderr,"made it here\n");
-  syslog(LOG_DAEMON|LOG_DEBUG,"accepted a connection!");
+  syslog(LOG_DAEMON|LOG_DEBUG,"%d: accepted a connection!",getpid());
   char buffer[65535];//fuck it. let's make it big.

+  char command_line[1024]="";
+  int i=0;
+  for(i=0;argv[i];i++) {
+    strcat(command_line,argv[i]);
+    if(argv[i+1]) strcat(command_line," ");
+  }
+
   int child=fork();
   if(child == 0) {
-    setpgid(0,0);
+    setpgid(0,0); // this sets the process group to the PID of the child, which the parent uses to clean up.
     x=dup(0);
     dup2(a[0],0);
     dup2(b[1],1);
@@ -445,19 +458,24 @@ int main(int argc,char *argv[]) {
     close(b[0]);
     close(c[0]);
     close(c[1]);
+    closelog();// try to close the syslog right, so if it has fd 3, because we're dup the socket into it, syslog will reopen on something else
+    close(3);// just make sure it is gone. don't care if it didn't exist before...
     dup2(x,3);//we're passing this to the child ONLY so it can do getpeername and stuff. this can probably be closed.
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: running child process: %s",getpid(),command_line);
+
     execv(argv[0],argv);
+
+    syslog(LOG_DAEMON|LOG_ERR,"%d: failed to run child process: %s",getpid(),command_line);
+    syslog(LOG_DAEMON|LOG_ERR,"%d: reason: %s",getpid(),strerror(errno));
     printf("[!!!] server-side tlswrap configuration error.\n");
     printf("[!!!] failed to execute subprocess.\n");
-    for(;*argv;argv++) {
-      printf("[!!!] argv: %s\n",*argv);
-    }
     return 0;
   }
   if(child == -1) {
-    syslog(LOG_DAEMON|LOG_WARNING,"failed to fork");
+    syslog(LOG_DAEMON|LOG_WARNING,"%d: failed to fork",getpid());
     return 1;
   }
+  syslog(LOG_DAEMON|LOG_DEBUG,"%d: spawned child: %d",getpid(),child);
   //fprintf(stderr,"made it here\n");
   int j;
   int r1;
@@ -476,7 +494,7 @@ int main(int argc,char *argv[]) {
   fdmax=b[0]>c[0]?b[0]:c[0];
   struct timeval orig_timeout;
   struct timeval timeout;
-  orig_timeout.tv_sec=60;
+  orig_timeout.tv_sec=60;// we give it a WHOLE MINUTE to get done with stuff...
   orig_timeout.tv_usec=0;// 1/100th of a second. (10ms) sound good?
   close(a[0]);
   close(b[1]);
@@ -499,9 +517,9 @@ int main(int argc,char *argv[]) {
 #endif
     umask(0077);
     request_log=open(request_log_filename,O_WRONLY|O_CREAT,0666);
-    syslog(LOG_DAEMON|LOG_ERR,"open error: %s",strerror(errno));
+    syslog(LOG_DAEMON|LOG_ERR,"%d: open error: %s",getpid(),strerror(errno));
     response_log=open(response_log_filename,O_WRONLY|O_CREAT,0666);
-    syslog(LOG_DAEMON|LOG_ERR,"open error: %s",strerror(errno));
+    syslog(LOG_DAEMON|LOG_ERR,"%d: open error: %s",getpid(),strerror(errno));
   }
 #endif
   //fprintf(stderr,"made it here\n");
@@ -512,44 +530,44 @@ int main(int argc,char *argv[]) {
     errorfs=master;
     timeout=orig_timeout;
     if((j=select(fdmax+1,&readfs,0,&errorfs,&timeout)) == -1 ) {
-      syslog(LOG_DAEMON|LOG_ERR,"giving up. error'd in select");
+      syslog(LOG_DAEMON|LOG_ERR,"%d: giving up. error'd in select",getpid());
       break;
     }
     if(j == 0) { // we hit the timeout
-      syslog(LOG_DAEMON|LOG_ERR,"giving up. timed out in select");
+      syslog(LOG_DAEMON|LOG_ERR,"%d: giving up. timed out in select",getpid());
       break;
     }

-    if(FD_ISSET(0,&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"select: stdin error");
-    if(FD_ISSET(b[0],&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"select: b[0] error");
-    if(FD_ISSET(c[0],&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"select: c[0] error");
+    if(FD_ISSET(0,&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"%d: select: stdin error",getpid());
+    if(FD_ISSET(b[0],&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"%d: select: b[0] error",getpid());
+    if(FD_ISSET(c[0],&errorfs)) syslog(LOG_DAEMON|LOG_ERR,"%d: select: c[0] error",getpid());

     // reads from the network
     if(FD_ISSET(0,&readfs)) {
 #ifndef NONTLS
       if((r1=SSL_read(ssl,buffer,sizeof(buffer))) <= 0) {
-        syslog(LOG_DAEMON|LOG_DEBUG,"network done. %d msg: %s",r1,ERR_error_string(ERR_get_error(),NULL));
+        syslog(LOG_DAEMON|LOG_DEBUG,"%d: network done. %d msg: %s",getpid(),r1,ERR_error_string(ERR_get_error(),NULL));
 #else
       if((r1=read(0,buffer,sizeof(buffer))) <= 0) {
-        syslog(LOG_DAEMON|LOG_DEBUG,"network done. %d msg: %s",r1,strerror(errno));
+        syslog(LOG_DAEMON|LOG_DEBUG,"%d: network done. %d msg: %s",getpid(),r1,strerror(errno));
 #endif
         FD_CLR(0,&master);
 	close(a[1]);
       } else {
 #ifndef NONTLS
-        syslog(LOG_DAEMON|LOG_DEBUG,"network read? %d msg: %s",r1,ERR_error_string(ERR_get_error(),NULL));
+        //syslog(LOG_DAEMON|LOG_DEBUG,"network read? %d msg: %s",r1,ERR_error_string(ERR_get_error(),NULL));
 #else
-        syslog(LOG_DAEMON|LOG_DEBUG,"network read? %d msg: %s",r1,strerror(errno));
+        //syslog(LOG_DAEMON|LOG_DEBUG,"network read? %d msg: %s",r1,strerror(errno));
 #endif
-        syslog(LOG_DAEMON|LOG_DEBUG,"read %d bytes from network!",r1);
+        //syslog(LOG_DAEMON|LOG_DEBUG,"read %d bytes from network!",r1);
 #ifdef TRAFFIC_LOGGING
         // also write to log file.
         if(request_log != -1 && (r3=write(request_log,buffer,r1)) < 0) {
-          syslog(LOG_DAEMON|LOG_ERR,"%s -> %s request_log write failed. -_- %d",ru,su,r3);
+          syslog(LOG_DAEMON|LOG_ERR,"%d: %s -> %s request_log write failed. -_- %d",getpid(),ru,su,r3);
         }
 #endif
         if((r3=write(a[1],buffer,r1)) < 0) {
-          syslog(LOG_DAEMON|LOG_ERR,"%s -> %s a write failed. -_- %d",ru,su,r3);
+          syslog(LOG_DAEMON|LOG_ERR,"%d: %s -> %s a write failed. -_- %d",getpid(),ru,su,r3);
           FD_CLR(0,&master);
         }
       }
@@ -557,22 +575,22 @@ int main(int argc,char *argv[]) {
     // reads from the subprocess's stdout
     if(FD_ISSET(b[0],&readfs)) {
       if((r2=read(b[0],buffer,sizeof(buffer))) <= 0) {
-        syslog(LOG_DAEMON|LOG_DEBUG,"subprocess stdout done.");
+        syslog(LOG_DAEMON|LOG_DEBUG,"%d: subprocess stdout done.",getpid());
         FD_CLR(b[0],&master);
 	close(b[0]);
       } else {
-        syslog(LOG_DAEMON|LOG_DEBUG,"read %d bytes from subprocess!",r2);
+        //syslog(LOG_DAEMON|LOG_DEBUG,"read %d bytes from subprocess!",r2);
 #ifdef TRAFFIC_LOGGING
         if(response_log != -1 && (r3=write(response_log,buffer,r2)) < 0) {
-          syslog(LOG_DAEMON|LOG_ERR,"%s -> %s response_log write failed. -_- %d",ru,su,r3);
+          syslog(LOG_DAEMON|LOG_ERR,"%d: %s -> %s response_log write failed. -_- %d",getpid(),ru,su,r3);
         }
 #endif
 #ifndef NONTLS
         if((r3=SSL_write(ssl,buffer,r2)) <= 0) {
-          syslog(LOG_DAEMON|LOG_ERR,"%s -> %s network write had an error: %d %s",ru,su,r3,ERR_error_string(ERR_get_error(),NULL));
+          syslog(LOG_DAEMON|LOG_DEBUG,"%d: %s -> %s network write had an error: %d %s",getpid(),ru,su,r3,ERR_error_string(ERR_get_error(),NULL));
 #else
 	if((r3=write(1,buffer,r2)) <= 0) {
-          syslog(LOG_DAEMON|LOG_ERR,"%s -> %s network write had an error: %d %s",ru,su,r3,strerror(errno));
+          syslog(LOG_DAEMON|LOG_DEBUG,"%d: %s -> %s network write had an error: %d %s",getpid(),ru,su,r3,strerror(errno));
 #endif
           // if where we are writing to doesn't work anymore,
           // we should close where we are reading from so select() will break out.
@@ -582,13 +600,13 @@ int main(int argc,char *argv[]) {
     }
     if(FD_ISSET(c[0],&readfs)) {
       if((r2=read(c[0],buffer,sizeof(buffer)-1)) <= 0) {
-        syslog(LOG_DAEMON|LOG_DEBUG,"subprocess stderr done.");
+        syslog(LOG_DAEMON|LOG_DEBUG,"%d: subprocess stderr done.",getpid());
         FD_CLR(c[0],&master);
       } else {
         //write(2,buffer,r2);
         buffer[r2]=0;//gotta null this off sice we're passing to something that expects a string.
         //fprintf(stderr,"%s",buffer);
-        syslog(LOG_DAEMON|LOG_WARNING,"%s -> %s stderr of [%s] : %s",ru,su,argv[0],buffer);
+        syslog(LOG_DAEMON|LOG_WARNING,"%d: %s -> %s stderr of [%s] : %s",getpid(),ru,su,argv[0],buffer);
       }
     }
   }
@@ -611,26 +629,39 @@ int main(int argc,char *argv[]) {

   int status;
   char *url=getenv("REMOTE_URL");
-  if(kill(-child,SIGCONT)) { //if the process hasn't exited already, let's tell them they've been hungup on.
-    syslog(LOG_DAEMON|LOG_WARNING,"%s: killpg(%d,SIGCONT): %s",url,getpid(),strerror(errno));
-  }
-  sleep(1);
-  if(waitpid(child,&status,WNOHANG) == child) {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s: child process exited after a SIGCONT and a 5s wait. gonna send a HUP to everything else anyway.",url);
-    // this is fine. no need to ERR it.
-  }
-  if(kill(-child,SIGHUP)) {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s: killpg: %s",url,strerror(errno));
-  }
   sleep(1);
-  if(kill(-child,SIGTERM)) {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s: killpg: %s",url,strerror(errno));
-  }
-  sleep(1);
-  if(kill(-child,SIGKILL)) {
-    syslog(LOG_DAEMON|LOG_DEBUG,"%s: killpg: %s",url,strerror(errno));
+  pid_t pid;
+
+  // SIGKILL is in here twice so we get an extra waitpid() to run after we SIGKILL the first time and we should then bail out from it.
+  int s[]={SIGCONT,SIGHUP,SIGTERM,SIGKILL,SIGKILL,0};
+  for(i=0;s[i];i++) {
+    sleep(3);
+    // loop while we have descendent processes that have exited and clean them up.
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> waitpid() for -%d ...",getpid(),url,child);
+    while((pid=waitpid(-child,&status,WNOHANG)) > 0) { // and if they exit normally, that's good...
+      syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> a descendent process %d exited. exitstatus: %d",getpid(),url,pid,WEXITSTATUS(status));
+      if(WEXITSTATUS(status) != 0) {
+        syslog(LOG_DAEMON|LOG_ERR,"%d: <%s> a descendent process %d exited, but with 'error' exitstatus: %d",getpid(),url,pid,WEXITSTATUS(status));
+      }
+    }
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> out of waitpid() for -%d loop",getpid(),url,child);
+    // some error happened. likely ran out of descendents.
+    if(pid < 0) {
+      syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> waitpid error: %s (%d)",getpid(),url,strerror(errno),errno);
+      if(errno == 10) {
+        syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> likely means there aren't any descendents left. bailing.",getpid(),url);
+        break;
+      } else {
+        syslog(LOG_DAEMON|LOG_ERR,"%d: <%s> not sure how waitpid failed this way: %s (%d)",getpid(),url,strerror(errno),errno);
+      }
+    }
+    // I want to see what happens if we SIGSTOP a process...
+    syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> kill(-%d,%d) ...",getpid(),url,child,s[i]);
+    if(kill(-child,s[i]) != 0) { // 0 is success with kill()
+      syslog(LOG_DAEMON|LOG_WARNING,"%d: <%s> kill(-%d,%d): %s (%d)",getpid(),url,child,s[i],strerror(errno),errno);
+    }
   }
-  // we will end up here all of the time anyway, no need for me to see these messages constantly.
-  syslog(LOG_DAEMON|LOG_DEBUG,"%s: sent signals to all processes in our process group. they should be DEAD",url);
+
+  syslog(LOG_DAEMON|LOG_DEBUG,"%d: <%s> exiting now. hopefully everything got cleaned up.",getpid(),url);
   return 0;
 }

-----END OF PAGE-----